Installation

  1. Install R
  2. Install RTools if you are on Windows
  3. Install RStudio

For more details, see Software and Package Versions.

Running This Code

  1. Ensure the installation steps above are completed
  2. Download a zip of the code and data here and unzip it
  3. In RStudio, open the src/src.Rproj file
  4. Then, open the src/index.Rmd file
  5. In RStudio:
    • Run all code: Click the Run drop down (top right of the code pane) and click Run All
    • Generate HTML version: Click knit (top left of code pane) and a file will be generated in docs/index.html

Libraries

Install R packages if needed.

# Required packages
required_packages <- c(
    "rmarkdown",
    "bookdown",
    "knitr",
    "tidyverse",
    "glue",
    "readxl",
    "ggtext",
    "scales",
    "patchwork",
    "DiagrammeR",
    "DiagrammeRsvg",
    "webshot2",
    "magick",
    "rsvg",
    "sf",
    "tmap",
    "ggspatial",
    "prettymapr",
    "units"
)

# Try to install packages if not installed
default_options <- options()
tryCatch(
    {
        # Disable interactivity
        options(install.packages.compile.from.source = "always")
        
        # Install package if not installed
        for (package in required_packages) {
            is_package_installed <- require(package, character.only = TRUE)
            if (!is_package_installed & package != "osmplotr") {
                cat(paste0("Installing package: ", package, "\n"))
                install.packages(package)
            } else {
                cat(paste0("Package already installed: ", package, "\n"))
            }
        }
    },
    error = function(cond) {
        stop(cond)
    },
    finally = {
        options(default_options) # reset interactivity
    }
)

Load R libraries.

library(DiagrammeR)
library(DiagrammeRsvg)
library(ggtext)
library(glue)
library(patchwork)
library(readxl)
library(rsvg)
library(sf)
library(tidyverse)
library(tmap)
library(ggspatial)
library(units)

Settings

settings <- list()

# Infrastructure types in order
settings$type_recode_infra <- c(
    PBL = "Cycle Track",
    BUF = "Buffered Lane",
    PL = "Painted Lane",
    LSB = "Local Street\nBikeway"
)

# Infrastructure types to remove
settings$type_filter_infra <- c("N", "None", "SR")

# Road types in order
settings$type_recode_road <- c(
    Arterial = "Arterial",
    Collector = "Collector",
    Local = "Local"
)

# Column references
settings$year_col_road <- "verify_install_year"
settings$type_col_road <- "road_type_recode"
settings$type_col_infra <- "verify_install_type"

# Set years of interest
settings$year_min <- 2009
settings$year_max <- 2022

# Plot settings
settings$line_year <- 2019
settings$basemaps <- c(
    "CartoDB.Positron",
    "CartoDB.DarkMatter",
    "Esri.WorldGrayCanvas"
)

# Map infrastructure changes since year
settings$infra_changes_year <- 2020

# Apply map settings
tmap_options(basemaps = settings$basemaps)

Functions

Function 1: calc_yearly_len

Calculate yearly road lengths.

The following function calculates yearly road lengths by infrastructure type using cumulative sums and filling in missing years and types.

For a given infrastructure type, the total road length for a given year is expressed below:

\[ length_{year,type} = f(year,type) = \sum_{i=year_{min}}^{year}l_{i, type}\ \mid\ l_{i, type} \geq 0 \]

Where:

  • \(year\) is the given year
  • \(type\) is the infrastructure type
  • \(year_{min}\) is the earliest year available in the data
  • \(l_{i,type}\) is the road length \(l\) for previous years \(i\) and infrastructure \(j\)
  • \(l_{i,type}\) is set to 0 if there is no data
#' Calculate Yearly Road Lengths By Infrastructure Type
#' 
#' Calculates the cumulative yearly road lengths by infrastructure type without considering infrastructure changes.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param year_col The name (char) or index (int) of the column containing the years.
#' @param type_col The name (char) or index (int) of the column containing the infrastructure type
#' @param len_col The name (char) or index (int) of the column containing the road lengths.
#' @param out_col The name (char) of the column containing the calculated yearly road lengths by type.
#'
#' @return A data.frame with three columns containing the year, type, and calculated yearly road lengths by type.
#' @export
#'
calc_yearly_len <- function(
        df,
        year_col = "verify_install_year",
        type_col = "verify_install_type",
        len_col = "geometry_len_km",
        out_col = "len",
        year_min = settings$year_min,
        year_max = settings$year_max
    ) {
    
    # Convert data types
    df <- as.data.frame(df)
    df[[year_col]] <- as.integer(df[[year_col]])
    df[[type_col]] <- as.character(df[[type_col]])
    df[[len_col]] <- as.numeric(df[[len_col]])
    
    # Remove rows with empty type
    out <- df %>% filter(
        !is.na(.data[[type_col]])
    )
    
    # Filter to min and max years
    if (year_min > 0) {
        df <- df %>% filter(
            .data[[year_col]] >= year_min
        )
    } else {
        year_min <- min(out[[year_col]], na.rm = TRUE)
    }
    if (year_max > 0) {
        df <- df %>% filter(
            .data[[year_col]] <= year_max
        )
    } else {
        year_max <- max(out[[year_col]], na.rm = TRUE)
    }
    
    # Add dummy len for each type and year combo
    # Covers cases where type and year combo does not exist
    # E.g. No new PL installs in 2021, hence a record PL in 2021 does not exist
    type_uniq <- unique(out[[type_col]])
    type_n <- length(type_uniq)
    year_uniq <- year_min:year_max
    year_n <- length(year_uniq)
    out <- out %>% add_row(
        !!year_col := rep(year_uniq, each = type_n),
        !!type_col := rep(type_uniq, year_n),
        !!len_col := rep(0, type_n * year_n)
    )
    
    # Calc cumsum for each non-empty type ordered by year
    out <- out %>%
        arrange(.data[[year_col]]) %>%
        group_by(.data[[type_col]]) %>%
        mutate(
            !!out_col := cumsum(.data[[len_col]])
        )

    # Get the last cumsum for each year and type
    out <- out %>%
        group_by(.data[[year_col]], .data[[type_col]]) %>%
        arrange(desc(row_number())) %>%
        slice(1)
    
    # Return only the columns spec
    out <- out %>% select(c(
            year_col,
            type_col,
            out_col
        ))
    return(out)
}

Function 2: calc_yearly_adj_len

Calculate yearly adjusted road length.

The following function calculates yearly adjusted road lengths by infrastructure type using cumulative sums and filling in missing years and types.

For a given infrastructure type, the total adjusted road length for a given year is expressed below:

\[ length_{year,type}^{install} + length_{year,type}^{change_i} - length_{year,type}^{replacement_i} \] Where:

  • \(length_{year,type}^{install}\) are the yearly cumulative road lengths for an infrastructure \(type\) installation
  • \(length_{year,type}^{change_i}\) are the yearly cumulative road lengths for an infrastructure \(type\) change in order \(i\)
  • \(length_{year,type}^{replacement_i}\) are the yearly cumulative road lengths for an infrastructure \(type\) replaced by change in order \(i\)
#' Calculate Yearly Adjusted Road Lengths By Infrastructure Type
#' 
#' Calculates the cumulative yearly adjusted road lengths by infrastructure type accounting for installations and subsequent changes.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param year_cols A vector of the names (char) or indices (int) of the columns containing the years of installations followed by infrastructure changes in order.
#' @param type_cols A vector of the names (char) or indices (int) of the columns containing the infrastructure types of installations followed by infrastructure changes in order.
#' @param type_col The name (char) of the column containing the type.
#' @param len_cols A vector of the names (char) or indices (int) of the columns containing the road lengths of installations followed by infrastructure changes in order.
#' @param out_cols The name (char) of the column containing the calculated yearly road lengths by type.
#' @param out_col The name (char) of the column containing the calculated yearly adjusted road lengths by type.
#' @param repl_suffix A suffix (char) to append to the columns representing the road lengths of replaced infrastructure types from changes.
#' @param ... Additional arguments passed to calc_yearly_len.
#' 
#' @return A data.frame with columns containing the year, type, cumulative road lengths of installations, changes, and replacements, and calculated yearly adjusted road lengths by type.
#' @export
#'
calc_yearly_adj_len <- function(
        df,
        year_cols = c("verify_install_year", "verify_upgrade1_year", "verify_upgrade2_year"),
        type_cols = c("verify_install_type", "verify_upgrade1_type", "verify_upgrade2_type"),
        type_col = "type",
        len_cols = "geometry_len_km",
        out_cols = c("install_len", "upgrade1_len", "upgrade2_len"),
        out_col = "adj_len",
        repl_suffix = "_replaced",
        ...
    ) {
    
    # Ensure df
    df <- as.data.frame(df)
    
    # Convert len_col if char
    len_cols <- rep(len_cols, length(year_cols))
    
    # Check cols same size
    year_cols_n <- length(year_cols)
    type_cols_n <- length(type_cols)
    len_cols_n <- length(len_cols)
    out_cols_n <- length(out_cols)
    if (length(unique(c(year_cols_n, type_cols_n, len_cols_n, out_cols_n))) != 1) {
        stop(glue(
            "The arguments 'year_cols' ({year_cols_n}), 'type_cols' ({type_cols_n}), 'len_cols' ({len_cols_n}), and 'out_cols' ({out_cols_n}) must be the same length."
        ))
    }
    
    # Calc yearly lens by infra type per install or change
    out <- list()
    for (i in 1:length(year_cols)) {
        
        # Get year, type, and len cols
        ycol <- year_cols[[i]]
        tcol <- type_cols[[i]]
        lcol <- len_cols[[i]]
        ocol <- out_cols[[i]]
        
        # Calc yearly len for install or change
        has_infra <- !is.na(df[[tcol]]) %>% all
        if (has_infra) {
            out <- append(
                out,
                calc_yearly_len(
                    df,
                    year_col = ycol,
                    type_col = tcol,
                    len_col = lcol,
                    out_col = ocol,
                    ...
                ) %>%
                    rename(
                        "year" := !!ycol,
                        "type" := !!tcol
                    ) %>% list
            )
        }
        
        # Calc yearly len for replacement
        if (i > 1) {
            
            # Get repl cols
            tcol_repl <- type_cols[[i - 1]]
            lcol_repl <- len_cols[[i - 1]]
            
            # Filter for repl records only where type is not eq to change type
            df_repl <- df %>% filter(.data[[tcol]] != .data[[tcol_repl]])
            
            # Calc repl len if there are any changes
            has_change <- !is.na(df_repl[[tcol]]) %>% all 
            if (has_change) {
                out <- append(
                    out,
                    calc_yearly_len(
                        df_repl,
                        year_col = ycol,
                        type_col = tcol_repl,
                        len_col = lcol_repl,
                        out_col = glue("{ocol}{repl_suffix}"),
                        ...
                    ) %>%
                    rename(
                        "year" := !!ycol,
                        "type" := !!tcol_repl
                    ) %>% list
                )
            }
        }
    }
    
    # Combine all lens in list to single df
    out <- out %>%
        reduce(
            left_join, by = c("year", "type")
        ) %>%
        ungroup()
    
    # Create template for change and repl cols
    change_cols <- paste0(out_cols[2:out_cols_n])# change cols
    change_cols <- c(change_cols, paste0(out_cols[2:out_cols_n], repl_suffix)) # repl cols
    change_cols_add <- rep(0, length(change_cols)) # set default vals
    names(change_cols_add) <- change_cols
    
    # Add change and repl cols set to 0 if not present
    out <- out %>% add_column(
        !!!change_cols_add[setdiff(names(change_cols_add), names(.))]
    )
    
    # Set NA to 0
    out <- out %>% mutate(
        across(everything(), ~replace_na(., 0))
    )
    
    # Calc yearly adj lens by infra type
    out <- out %>%
        mutate( # added len by infra types due to install or changes
            !!out_col := reduce(across(all_of(out_cols)), `+`)
        ) %>%
        mutate( # removed len by infra types due to replacements
            !!out_col := .data[[out_col]] - reduce(
                across(all_of(
                    paste0(out_cols[2:out_cols_n], repl_suffix)
                )),
                `-`
            )
        )
    
    # Rename type col
    out <- out %>% rename(!!type_col := type)
    return(out)
}

Function 3: plot_yearly_len

Plot road lengths by generic types.

This function plots an area chart showing the cumulative road lengths by a user-defined type for each year.

This is a generic function for user-defined types such as infrastructure or road types.

#' Plot Yearly Road Lengths By Type
#' 
#' Creates an area plot of road lengths by category types.
#'
#' @param df A data.frame with three columns containing the year, type, and road lengths.
#' @param title The title (char) of the plot.
#' @param title_underline Set to TRUE to underline the title.
#' @param x_title The title (char) of the x-axis.
#' @param y_title The title (char) of the y-axis.
#' @param y_suffix The suffix (char) to add to the end of y axis values.
#' @param legend_title The title (char) of the legend.
#' @param legend Set to TRUE to include a legend.
#' @param year_col The name (char) or index (int) of the column containing the years.
#' @param year_min The minimum year (int) to display.
#' @param year_max The maximum year (int) to display.
#' @param year_int The year intervals (int) to display. For example, 1 displays every year, and 2 displays every two years.
#' @param len_col The name (char) or index (int) of the column containing the road lengths.
#' @param type_col The name (char) or index (int) of the column containing the type.
#' @param type_filter A vector (char) of types to remove fomr the plot.
#' @param type_recode A named vector (char) of names representing types and values representing the values to replace each type with.
#' @param line_50km Set to TRUE to draw the 50 km red reference line.
#' @param line_year Set to a year (int) to draw a reference line for a year. If FALSE, a line will not be drawn.
#' @param color_low The bottom color (char) of the type.
#' @param color_high The top color (char) of the type.
#' @return An area ggplot of the cumulative yearly road lengths by type.
#' @export
#'
plot_yearly_len <- function(
        df,
        title = "",
        title_underline = TRUE,
        x_title = "",
        y_title = "",
        y_suffix = " km",
        legend_title = "Type",
        legend = TRUE,
        year_col = "year",
        year_min = FALSE,
        year_max = FALSE,
        year_int = 1,
        len_col = "adj_len",
        type_col = "type",
        type_filter = c(),
        type_recode = c(),
        line_50km = FALSE,
        line_year = FALSE,
        color_low = "#DFEBF7",
        color_high = "#3683BB"
) {
    
    # Filter to start and end years
    if (year_min > 0) {
        df <- df %>% filter(
            .data[[year_col]] >= year_min
        )
    }
    if (year_max > 0) {
        df <- df %>% filter(
            .data[[year_col]] <= year_max
        )
    }
    
    # Filter out particular infrastructure types
    if (length(type_filter) > 0) {
        df <- df %>% filter(
            !.data[[type_col]] %in% type_filter
        )
    }
    
    # Recode and reorder category types
    if (length(type_recode) > 0) {
        
        # Reorder category types
        type_uniq <- unique(df[[type_col]])
        type_reorder <- names(type_recode)
        type_reorder <- c(type_reorder, type_uniq[!type_uniq %in% type_reorder])
        df[[type_col]] <- factor(df[[type_col]], levels = type_reorder)
        
        # Recode category types
        df[[type_col]] <- recode(df[[type_col]], !!!type_recode)
    }
    
    # Create fill colors
    type_n <- length(type_uniq)
    type_colors <- scales::seq_gradient_pal(
        color_low,
        color_high
    )(seq(0, 1, length.out = type_n))
    
    # Create base area plot with legend and labels
    len_max <- max(df[[len_col]], na.rm = TRUE)
    year_max <- max(df[[year_col]], na.rm = TRUE)
    out <- ggplot(
        df,
        aes(
            x = .data[[year_col]],
            y = .data[[len_col]],
            fill = .data[[type_col]],
            order = desc(.data[[type_col]])
        )
    ) +
    geom_area(colour = NA, alpha = 0.7) +
    scale_fill_manual(values = type_colors) +
    geom_line(
        position = "stack",
        size = 0.2
    ) +
    labs(
        x = x_title,
        y = y_title,
        fill = legend_title
    ) +
    guides(
        fill = FALSE,
        color = FALSE
    ) +
    scale_x_continuous(
        breaks = seq(year_min, year_max, by = year_int),
        labels = seq(year_min, year_max, by = year_int),
        limits = c(year_min, year_max)
    ) +
    scale_y_continuous(
        label = scales::label_number(suffix = y_suffix)
    ) +
    theme_minimal() +
    theme(
        plot.margin = unit(c(5,5,5,5), "points")
    )
    
    # Add title
    if (title_underline) {
        out <- out + ggtitle(
            bquote(underline(.(title)))
        )
    } else {
        out <- out + ggtitle(title)
    }
    
    # Add legend
    if (legend) {
        out <- out + guides(fill = guide_legend(
            reverse = FALSE,
            override.aes = list(
                alpha = 0.7,
                color = NA,
                shape = NA
            )
        ))
    }
    
    # Add dotted year ref line
    if (line_year) {
        out <- out + geom_vline(
            xintercept = line_year,
            color = "black",
            linetype = "dashed"
        )
    }
    
    # Add red 50km ref line
    if (line_50km) {
        out <- out + geom_segment( # 50km red line
            aes(
                x = 2009,
                y = 0,
                xend = 2009,
                yend = 50,
                color = "#bb0000"
            )
        ) +
        geom_segment( # 50km red triangle point down
            aes(
                x = 2009,
                y = 50.01 - (len_max * 0.05),
                xend = 2009,
                yend = 50 - (len_max * 0.05),
                color = "#bb0000"
            ),
            arrow = arrow(
                length = unit(0.03, "npc"),
                ends = "last",
                type = "closed"
            )
        ) +
        geom_segment( # 50km red triangle point up
            aes(
                x = 2009,
                y = (len_max * 0.05) - 0.01,
                xend = 2009,
                yend = (len_max * 0.05),
                color = "#bb0000"
            ),
            arrow = arrow(
                length = unit(0.03, "npc"),
                ends = "last",
                type = "closed"
            )
        ) +
        annotate(
            "text",
            x = 2009,
            y = 50,
            label = "50km",
            color = "#bb0000",
            hjust = -0.225
        )
    }
    return(out)
}

Function 3a: plot_yearly_len_infra

Plot yearly adjusted road lengths by infrastructure type.

This function plots area charts of yearly road lengths by infrastructure types for a list of data.

This uses the plot_yearly_len function.

#' Plot Yearly Road Lengths By Infrastructure Type
#' 
#' Creates area plots of road lengths by infrastructure type.
#'
#' @param df_list A list of data.frame containing the install and change years, type, and road segment lengths.
#' @return Multiple area ggplots of the cumulative yearly road lengths by infrastructure type combined with patchwork.
#' @export
#'
plot_yearly_len_infra <- function(df_list) {
    
    # Create infra plots from data
    p <- list()
    for (i in 1:length(df_list)) {
        
        # Get data and plot title
        df <- df_list[[i]]
        ptitle <- names(df_list)[[i]]
        
        # Create and add infra plot to list
        p[[i]] <- calc_yearly_adj_len(df, type_col = settings$type_col_infra) %>%
            plot_yearly_len(
                title = ptitle,
                year_min = settings$year_min,
                year_max = settings$year_max,
                type_col = settings$type_col_infra,
                type_filter = settings$type_filter_infra,
                type_recode = settings$type_recode_infra,
                legend_title = "Infrastructure Type",
                line_50km = TRUE,
                line_year = settings$line_year
            )
    }
    
    # Y-axis title
    y_title <- ggplot() +
        annotate(
            geom = "text",
            x = 1,
            y = 1,
            label = "Total Length (Centreline km)",
            angle = 90,
            size = 5
        ) +
        coord_cartesian(clip = "off")+
        theme_void()
    
    # Combine all infra plots together
    out <- (y_title | wrap_plots(p, nrow = length(p))) +
        plot_annotation(
            title = "Roadways with Dedicated Cycling Infrastructure",
            caption = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
            theme = theme(
                plot.title = element_text(hjust = 0.5, size = 16),
                plot.caption = element_text(hjust = 0.5, size = 14)
            )
        ) +
        plot_layout(widths = c(0.05, 1))
    return(out)
}

Function 3b: plot_yearly_len_road

Plots yearly adjusted road lengths for road types.

This function plots area charts of yearly road lengths by overall road type and by infrastructure separated by each road type.

This uses the plot_yearly_len function.

#' Plot Yearly Road Lengths By Road Type
#'
#' Creates area plots of road lengths by overall road type, and by infrastructure per road type.
#'
#' @param df The data.frame containing the install and change years, type, and road segment types and lengths. 
#' @return Multiple area ggplots of the cumulative yearly road lengths by road type combined with patchwork.
#' @export
#'
plot_yearly_len_road <- function(df, title = "Roadways with Dedicated Cycling Infrastructure") {
    
    # Create list to store plots
    p <- list()

    # Plot overall road types
    p[[1]] <- calc_yearly_len(
        df,
        year_col = settings$year_col_road,
        type_col = settings$type_col_road
    ) %>%
        plot_yearly_len(
            title = title,
            title_underline = FALSE,
            year_col = settings$year_col_road,
            year_min = settings$year_min,
            year_max = settings$year_max,
            x_title = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
            y_title = "Total Length (Centreline km)",
            legend_title = "Roadway Type",
            type_col = settings$type_col_road,
            type_recode = settings$type_recode_road,
            len_col = "len",
            line_50km = FALSE,
            line_year = settings$line_year,
            color_low = "#C1DDB3",
            color_high = "#297A22"
        ) +
        theme(
            plot.title = element_text(size = 18),
            plot.margin = margin(0, 0, 0, 0, "pt")
        )
    
    # Plot arterial, collector, and local road by infra
    rtypes <- c("Arterial", "Collector", "Local")
    for (i in 1:length(rtypes)) {
        
        # Get road type
        r <- rtypes[i]
        
        # Create infra plot for road type
        p[[i + 1]] <- calc_yearly_adj_len(
            df %>% filter(.data[[settings$type_col_road]] == r),
            type_col = settings$type_col_infra
        ) %>%
            plot_yearly_len(
                title = sprintf("%s Roadways", r),
                title_underline = FALSE,
                line_50km = FALSE,
                line_year = settings$line_year,
                year_int = 2,
                x_title = sprintf("Years (%s-%s)", settings$year_min, settings$year_max),
                y_title = "Total Length (Centreline km)",
                year_min = settings$year_min,
                year_max = settings$year_max,
                type_col = settings$type_col_infra,
                type_filter = settings$type_filter_infra,
                type_recode = settings$type_recode_infra,
                legend_title = "Infrastructure Type"
            ) +
            theme(
                plot.title = element_text(size = 14),
                plot.margin = margin(0, 12, 0, 0, "pt")
            )
    }
    
    # Plot horizontal gradient bar
    grad_bar <-  ggplot(data.frame(x = 1:4), aes(x = x, y = 1, color = x)) +
        geom_line(size = 4) +
        scale_color_gradient(low = "#C1DDB3", high = "#297A22") +
        theme_void() +
        guides(color = FALSE) +
        theme(
            axis.title = element_blank(),
            axis.text = element_blank(),
            axis.ticks = element_blank(),
            axis.line = element_blank(),
            plot.margin = margin(0, 0, 0, 0, "pt")
        )
    
    # Plot overall and road type plots together
    out <- ( # overall plot
        plot_spacer() +
        p[[1]] +
        plot_spacer() +
        plot_layout(
            widths = c(0.25, 0.35, 0.2)
        )
    ) / ( # gradient bar
        plot_spacer() +
        grad_bar +
        plot_spacer() +
        plot_layout(widths = c(-0.8, 10, -1.1))
    ) / ( # infra plots
        p[[2]] +
        p[[3]] +
        p[[4]]
    ) + plot_layout(
        heights = c(12, 1, 8)
    ) + plot_annotation( # A B tags
        tag_levels = list(c("A", "", "B", "", ""))
    ) & theme(
        plot.tag = element_text(face = "bold", size = 12)
    )
    return(out)
}

Function 4: plot_yearly_diff

Plots differences between two years.

This function plots a bar chart of differences between two columns containing years.

This function is used to check the differences in installation years between the city’s data and the verified data.

#' Plot Yearly Differences
#'
#' Creates a bar plot of the differences between two years.
#'
#' @param df The data.frame containing the two columns with the years.
#' @param year_col1 The name (char) or index (int) of the first year column.
#' @param year_col2 The name (char) or index (int) of the second year column to be subtracted from.
#' @param year_col1_name The name alias (char) of the first year column year_col1.
#' @param year_col2_name The name alias (char) of the second year column year_col2.
#' @param year_min The minimum year (int) to calculate differences for.
#' @param year_max The maximum year (int) to calculate differences for.
#' @param title The title (char) of the plot.
#' @param title_n Set to TRUE to add the number of total segments considered.
#' @param x_title The title (char) of the x-axis.
#' @param y_title The title (char) of the y-axis.
#' @param x_breaks The number (int) of breaks to show on the x-axis. Set to FALSE to let ggplot automatically decide.
#' @param x_perc Set to TRUE to show proportions and FALSE to show counts.
#' @param out_data Set to TRUE to return a list
#' 
#' @return A ggplot of yearly differences (year_col2 - year_col1), displaying the proportion of rows for each difference in years. If `out_data` is TRUE then returns a list with keys `data` representing the data used for plotting and `plot` with the ggplot object.
#' @export
#'
plot_yearly_diff <- function(
        df,
        year_col1 = "install_year",
        year_col2 = "verify_install_year",
        year_col1_name = "City Year",
        year_col2_name = "Verified Year",
        year_min = settings$year_min,
        year_max = settings$year_max,
        title = sprintf(
            "Difference in Years, Comparing %s and %s",
            year_col1_name,
            year_col2_name
        ),
        title_n = TRUE,
        x_title = sprintf(
            "Difference in Years (%s - %s)",
            year_col2_name,
            year_col1_name
        ),
        y_title = "Proportion of Total Segments",
        x_breaks = 15,
        x_perc = TRUE,
        out_data = FALSE
) {
    
    # Filter for comparable rows only
    pdata <- df %>% filter(
        !is.na(.data[[year_col1]]) & !is.na(.data[[year_col2]])
    )
    
    # Filter within min year
    if (year_min) {
        pdata <- pdata %>% filter(
            .data[[year_col2]] > year_min
        )
    }
    
    # Filter within max year
    if (year_max) {
        pdata <- pdata %>% filter(
            .data[[year_col2]] <= year_max
        )
    }
    
    # Add n to title
    if (title_n) {
        title <- sprintf("%s (n=%s)", title, nrow(pdata))
    }
    
    # Calc yearly diff
    pdata <- pdata %>%
        mutate(year_diff = .data[[year_col2]] - .data[[year_col1]]) %>%
        count(year_diff) %>%
        mutate(n_perc = (n / sum(n)) * 100)
    
    # Set to proportions or counts
    pdata$y <- if (x_perc) pdata$n_perc else pdata$n
    
    # Plot yealy diffs
    out <- pdata %>% 
        ggplot(aes(
            x = year_diff,
            y = y
        )) +
        geom_bar(
            stat = "identity",
            color = "#332a94",
            fill = "#c3d5e4",
            width = 1
        ) +
        labs(
            title = title,
            x = x_title,
            y = y_title
        ) +
        theme(
            plot.title = element_text(size = 12)
        )
    
    # Add percentage sign if percentages
    if (x_perc) {
        out <- out +
            scale_y_continuous(
                label = scales::label_number(suffix = "%")
            )
    }
    
    # Set x interval breaks
    if (x_breaks) {
        out <- out + scale_x_continuous(
            breaks = scales::breaks_pretty(x_breaks)
        )
    }
    
    # Returns ggplot obj or list
    if (out_data) {
        out <- list(
            data = pdata,
            plot = out
        )
    }
    return(out)
}

Function 5: filter_criteria

Fitler for segment inclusion criteria

This function applies segment inclusion critieria to a list of data.frames. Optionally creates a data.frame of counts, segment lengths, and other exclusions (duplicates, misclassifications) per inclusion criteria step along with a list of the data.frames after applying the inclusion criteria.

#' Filter for Segment Inclusion Criteria
#'
#' This function applies segment inclusion critieria to a list of data.frames. Optionally creates a data.frame of counts, segment lengths, and other exclusions (duplicates, misclassifications) per inclusion criteria step along with a list of the data.frames after applying the inclusion criteria.
#'
#' @param criteria_data A list of lists, where each list contains the following structure defining the segment inclusion criteria for each city:
#' \itemize{
#'  \item \code{city}: the name (char) of the city (required).
#'  \item \code{data}: the data.frame containing road segments and applicable columns for inclusion criteria filtering (required).
#'  \item \code{data_date}: the date (char) that the data was acquired.
#'  \item \code{infra_col}: the column name (char) of the column containing the dedicated cycling infrastructure types to filter.
#'  \item \code{infra_filter}: the vector of characters of dedicated cycling infrastructure types to include.
#'  \item \code{road_col}: the column name (char) of the column containing the road location types to filter.
#'  \item \code{road_filter}: the vector of characters of road location types to exclude.
#'  \item \code{status_col}: the column name (char) of the column containing the inactive road status types to filter.
#'  \item \code{status_filter}: the vector of characters of inactive road status types to include.
#'  \item \code{geom_col}: the column name (char) of the column containing geometries.
#'  \item \code{geom_unit}: the unit measure (char) of the geometry 
#'  \item \code{geom_filter}: Set to TRUE to filter for null and duplicate geometries.
#'  \item \code{misclass_col}: the column name (char) of the column containing misclassification types to filter.
#'  \item \code{misclass_filter}: the vector of characters indicating non-misclassified rows of data to include. Usually set to c("NA", NA) to indicate that the row is not misclassified.
#'  \item \code{noverify_col}: the column containing infrastructure install types (char) that are not verified. This does not filter the data, but calculates and adjusts for the rows and road lengths of non-verified segments.
#'. \item \code{noverify_filter}: the vector of characters of non-verified infrastructure install types from the city. This does not filter the data, but calculates and adjusts for the rows and road lengths of non-verified segments.
#' }
#' @param len_func A function to apply to road length calculations. The default is a function that converts from meters to km.
#' 
#' @return A list of lists, where each list has keys and values from \code{criteria_data}, and the following additional keys:
#' \itemize{
#'  \item \code{data_filter}: the data.frame after filtering for segment inclusion criteria (required).
#'  \item \code{infra_filter_applied}: TRUE if dedicated cycling infrastructure filter was applied and FALSE otherwise (required).
#'  \item \code{infra_filter_n}: total rows (numeric) after filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#'  \item \code{infra_filter_len}: total road length (numeric) after filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#'  \item \code{infra_filter_nx}: total rows (numeric) affected by filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#'  \item \code{infra_filter_lenx}: total road length (numeric) affected by filtering for dedicated cycling infrastructure using \code{infra_filter} (required).
#'  \item \code{road_filter_applied}: TRUE if road location filter was applied and FALSE otherwise (required).
#'  \item \code{road_filter_n}: total rows (numeric) after filtering for road location using \code{infra_filter} (required).
#'  \item \code{road_filter_len}: total road length (numeric) after filtering for road location using \code{infra_filter} (required).
#'  \item \code{road_filter_nx}: total rows (numeric) affected by filtering for road location using \code{infra_filter} (required).
#'  \item \code{road_filter_lenx}: total road length (numeric) affected by filtering for road location using \code{infra_filter} (required).
#'  \item \code{status_filter_applied}: TRUE if inactive road status filter was applied and FALSE otherwise (required).
#'  \item \code{status_filter_n}: total rows (numeric) after filtering for inactive road status using \code{status_filter} (required).
#'  \item \code{status_filter_len}: total road length (numeric) after filtering for inactive road status using \code{status_filter} (required).
#'  \item \code{status_filter_nx}: total rows (numeric) affected by filtering for inactive road status using \code{status_filter} (required).
#'  \item \code{status_filter_lenx}: total road length (numeric) affected by filtering for inactive road status using \code{status_filter} (required).
#'  \item \code{geom_filter_null_applied}: TRUE if null geometries filter was applied and FALSE otherwise (required).
#'  \item \code{geom_filter_null_n}: total rows (numeric) after filtering for null geometries (required).
#'  \item \code{geom_filter_null_len}: total road length (numeric) after filtering for null geometries (required).
#'  \item \code{geom_filter_null_nx}: total rows (numeric) affected by filtering for null geometries (required).
#'  \item \code{geom_filter_null_lenx}: total road length (numeric) affected by filtering for null geometries (required).
#'  \item \code{geom_filter_dup_applied}: TRUE if duplicate geometries filter was applied and FALSE otherwise (required).
#'  \item \code{geom_filter_dup_n}: total rows (numeric) after filtering for duplicate geometries (required).
#'  \item \code{geom_filter_dup_len}: total road length (numeric) after filtering for duplicate geometries (required).
#'  \item \code{geom_filter_dup_nx}: total rows (numeric) affected by filtering for duplicate geometries (required).
#'  \item \code{geom_filter_dup_lenx}: total road length (numeric) affected by filtering for duplicate geometries (required).
#'  \item \code{elig_n}: total rows (numeric) after the above filters eligible for data entry and screening (required).
#'  \item \code{elig_len}: total road length (numeric) after the above filters eligible for data entry and screening (required).
#'  \item \code{misclass_filter_applied}: TRUE if null misclassifications filter was applied and FALSE otherwise (required).
#'  \item \code{misclass_filter_n}: total rows (numeric) after filtering misclassifications using \code{misclass_filter} (required).
#'  \item \code{misclass_filter_len}: total road length (numeric) after filtering misclassifications using \code{misclass_filter} (required).
#'  \item \code{misclass_filter_nx}: total rows (numeric) affected by filtering misclassifications using \code{misclass_filter} (required).
#'  \item \code{misclass_filter_lenx}: total road length (numeric) affected by misclassifications using \code{misclass_filter} (required).
#'  \item \code{misclass_filter_uniq_n}: a data.frame of total rows for each misclassification type.
#'  \item \code{misclass_filter_uniq_len}: a data.frame of total road lengths for each misclassification type.
#'  \item \code{noverify_filter_applied}: TRUE if non-verified infrastructure filter was calculated and FALSE otherwise (required).
#'  \item \code{noverify_filter_nx}: total rows (numeric) of non-verified infrastructure from \code{noverify_filter} (required).
#'  \item \code{noverify_filter_lenx}: total road length (numeric) affected by non-verified infrastructure using \code{noverify_filter} (required).
#'  \item \code{incl_n}: final total rows (numeric) after the above filters (required).
#'  \item \code{incl_len}: final total road length (numeric) after the above filters (required).
#' }
#' @export
#'
filter_criteria <- function(
    criteria_data,
    len_func = function (x) as.numeric(x) / 1000
) {
    
    # Apply criteria to list and track counts and lengths
    out <- criteria_data
    for (i in 1:length(criteria_data)) {
        
        # Get criteria data
        x <- criteria_data[[i]]
        df <- x$data
        city <- x$city
        
        # Set initial apply status for filters
        out[[city]]$infra_filter_applied <- FALSE
        out[[city]]$road_filter_applied <- FALSE
        out[[city]]$status_filter_applied <- FALSE
        out[[city]]$geom_filter_null_applied <- FALSE
        out[[city]]$geom_filter_dup_applied <- FALSE
        out[[city]]$misclass_filter_applied <- FALSE
        out[[city]]$noverify_filter_applied <- FALSE
        
        # Count/len initial
        out[[city]]$data_n <- nrow(df)
        out[[city]]$data_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Filter for dedicated cycling infra
        if (all(c("infra_col", "infra_filter") %in% names(x))) {
            
            # Apply ded cyc infra filter
            df <- df %>%
                filter(.data[[x$infra_col]] %in% x$infra_filter)
            
            # Set ded cyc infra filter status
            out[[city]]$infra_filter_applied <- TRUE
        }
        
        # Count/len ded cyc infra filter
        out[[city]]$infra_filter_n <- nrow(df)
        out[[city]]$infra_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Count/len affected by ded cyc infra filter
        out[[city]]$infra_filter_nx <- out[[city]]$data_n - out[[city]]$infra_filter_n
        out[[city]]$infra_filter_lenx <- out[[city]]$data_len - out[[city]]$infra_filter_len
        
        # Filter for road location
        if (all(c("road_col", "road_filter") %in% names(x))) {
            
            # Apply road filter
            df <- df %>%
                filter(!.data[[x$road_col]] %in% x$road_filter)
            
            # Set road filter status
            out[[city]]$road_filter_applied <- TRUE
        }
        
        # Count/len road filter
        out[[city]]$road_filter_n <- nrow(df)
        out[[city]]$road_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Count/len affected by road filter
        out[[city]]$road_filter_nx <- out[[city]]$infra_filter_n - out[[city]]$road_filter_n
        out[[city]]$road_filter_lenx <- out[[city]]$infra_filter_len - out[[city]]$road_filter_len
        
        # Filter for status
        if (all(c("status_col", "status_filter") %in% names(x))) {
            
            # Apply status filter
            df <- df %>%
                filter(!.data[[x$status_col]] %in% x$status_filter)
            
            # Set status filter status
            out[[city]]$status_filter_applied <- TRUE
        }
        
        # Count/len status filter
        out[[city]]$status_filter_n <- nrow(df)
        out[[city]]$status_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Count/len affected by status filter
        out[[city]]$status_filter_nx <- out[[city]]$road_filter_n - out[[city]]$status_filter_n
        out[[city]]$status_filter_lenx <- out[[city]]$road_filter_len - out[[city]]$status_filter_len
        
        # Filter for null geoms
        if (all(c("geom_col", "geom_filter") %in% names(x))) {
            
            # Apply null geom filter
            df <- df %>%
                filter(!is.na(.data[[x$geom_col]]))
            
            # Set dup geom filter status
            out[[city]]$geom_filter_null_applied <- TRUE
        }
        
        # Count/len null geom filter
        out[[city]]$geom_filter_null_n <- nrow(df)
        out[[city]]$geom_filter_null_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Count/len affected by null geom filter
        out[[city]]$geom_filter_null_nx <- out[[city]]$status_filter_n - out[[city]]$geom_filter_null_n
        out[[city]]$geom_filter_null_lenx <- out[[city]]$status_filter_len - out[[city]]$geom_filter_null_len
            
        # Filter for dup geoms
        if (all(c("geom_col", "geom_filter") %in% names(x))) {
            
            # Apply dup geom filter
            df <- df %>%
                distinct(.data[[x$geom_col]], .keep_all = TRUE)
            
            # Set dup geom filter status
            out[[city]]$geom_filter_dup_applied <- TRUE
        }
        
        # Count/len dupl geom filter
        out[[city]]$geom_filter_dup_n <- nrow(df)
        out[[city]]$geom_filter_dup_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE))
        
        # Count/len affected by dupl geom filter
        out[[city]]$geom_filter_dup_nx <- out[[city]]$geom_filter_null_n - out[[city]]$geom_filter_dup_n
        out[[city]]$geom_filter_dup_lenx <- out[[city]]$geom_filter_null_len - out[[city]]$geom_filter_dup_len
        
        # Calculate noverify segments
        if (all(c("noverify_col", "noverify_filter") %in% names(x))) {
            
            # Apply noverify filter separately
            df_noverify <- df %>%
                filter(!is.na(.data[[x$noverify_col]]))
            
            # Set noverify filter status
            out[[city]]$noverify_filter_applied <- TRUE
            
            # Count/len of noverify segments
            out[[city]]$noverify_filter_nx <- df_noverify %>% nrow
            out[[city]]$noverify_filter_lenx <- len_func(sum(st_length(df_noverify[[x$geom_col]]), na.rm = TRUE))
            
        } else {
            
            # Set to 0 if all segments are verified
            out[[city]]$noverify_filter_nx <- len_func(as_units(0, "meters"))
            out[[city]]$noverify_filter_lenx <- len_func(as_units(0, "meters"))
        }
        
        # Count/len eligible
        out[[city]]$elig_n <- nrow(df) - out[[city]]$noverify_filter_nx
        out[[city]]$elig_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
        
        # Filter for misclass
        if (all(c("misclass_col", "misclass_filter") %in% names(x))) {
            
            # Count/len misclass groups
            out[[city]]$misclass_filter_uniq_n <- df %>%
                filter(!is.na(.data[[x$misclass_col]])) %>%
                count(.data[[x$misclass_col]]) %>%
                as_tibble
            out[[city]]$misclass_filter_uniq_len <- df %>%
                filter(!.data[[x$misclass_col]] %in% x$misclass_filter) %>%
                group_by(.data[[x$misclass_col]]) %>%
                summarize(len = len_func(sum(st_length(.data[[x$geom_col]]), na.rm = TRUE))) %>%
                as_tibble
            
            # Apply misclass filter
            df <- df %>%
                filter(.data[[x$misclass_col]] %in% x$misclass_filter)
            
            # Set misclass filter status
            out[[city]]$misclass_filter_applied <- TRUE
        }
        
        # Count/len misclass filter
        out[[city]]$misclass_filter_n <- nrow(df) - out[[city]]$noverify_filter_nx
        out[[city]]$misclass_filter_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
        
        # Count/len affected by misclass filter
        out[[city]]$misclass_filter_nx <- out[[city]]$elig_n - out[[city]]$misclass_filter_n
        out[[city]]$misclass_filter_lenx <- out[[city]]$elig_len - out[[city]]$misclass_filter_len
        
        # Count/len eligible
        out[[city]]$incl_n <- nrow(df) - out[[city]]$noverify_filter_nx
        out[[city]]$incl_len <- len_func(sum(st_length(df[[x$geom_col]]), na.rm = TRUE)) - out[[city]]$noverify_filter_lenx
        
        # Save filtered data
        out[[city]]$data_filter <- df
    }
    return(out)
}

Function 5a: diag_criteria

Diagram the segment inclusion criteria results.

This function draws a flow diagram of overall methods for segment inclusion criteria using output from filter_criteria.

#' Diagram the segment inclusion criteria results
#' 
#' This function draws a flow diagram of overall methods for segment inclusion criteria using output from \code{\link{filter_criteria}}.
#'
#' @param criteria_data A list of lists in the structure of the output from \code{\link{filter_criteria}}.
#' @param note A note (char) to display at the end of the diagram.
#' @return A \code{\link[DiagrammeR]{grViz}} object.
#' @export
#'
diag_criteria <- function(
        criteria_data,
        note = "*Denotes segments misclassified as an ineligible type (off-street path, shared road, or inactive temporary infrastructure)<br/>**Local Street Bikeways (LSB) were included but not screened or verified as they did not fit the Can-BICS definitions"
) {
    
    # Diag settings
    diag_settings <- "
        rankdir = LR
        node[
            shape = box,
            width = 2.75,
            height = 1.65,
            style = filled,
            fillcolor = white,
            penwidth = 1.5,
            fontname = 'Arial'
        ]
        edge[
            arrowhead = vee,
            arrowtail = vee
        ]
        layout = neato
    "
    
    # Top header row
    row_top <- "
        open_data[
            label = 'Open Data',
            height = 0.5,
            fillcolor = '#d7e9fe',
            pos = '0,1!'
        ]
        elig_data[
            label = 'Eligible Segments',
            height = 0.5,
            fillcolor = '#d7e9fe',
            pos = '3.25,1!'
        ]
        incl_data[
            label = 'Inclusions',
            height = 0.5,
            fillcolor = '#d7e9fe',
            pos = '6.5,1!'
        ]
    "
    
    # Create template for row addition
    row_temp <- "
        open{i}[
            label = <<b>{city}</b><br/>N = {open_n} Segments<br/><i>({open_len})<br/>Downloaded: {open_date}</i>>,
            pos = '0,{y}!'
        ]
        
        elig{i}[
            label = <n = {elig_n} Segments<br/>({elig_len})<i><br/><b>Exclusions</b>{elig_inelig}{elig_dup}{elig_poly}</i>>,
            pos = '3.25,{y}!'
        ]
        
        incl{i}[
            label = <n = {incl_n} Segments<br/><i>({incl_len}){noverify}<br/><b>Exclusions</b>{incl_miss}{incl_dup}</i>>,
            pos = '6.5,{y}!'
        ]
        
        open{i} -> elig{i} -> incl{i}
    "
    
    # Generate row additions per city
    y <- -0.21
    y_gap <- 1.85
    row_adds <- ""
    for (i in 1:length(criteria_data)) {
        
        # Vars per city
        criteria <- criteria_data[[i]]
        
        # Generate geom filter dup info
        elig_dup <- ""
        if (criteria$geom_filter_dup_nx > 0) {
            elig_dup <- glue(
                "<br/>Duplicates: n = {n}",
                n = criteria$geom_filter_dup_nx
            )
        }
        
        # Generate geom filter null info
        elig_poly <- ""
        if (criteria$geom_filter_null_nx > 0) {
            elig_poly <- glue(
                "<br/>No Polyline Data: n = {n}",
                n = criteria$geom_filter_null_nx
            )
        }
        
        # Generate inelig info
        elig_inelig <- glue(
            "<br/>Ineligible: n = {n}",
            n = criteria[["infra_filter_nx"]] + criteria[["status_filter_nx"]] + criteria[["road_filter_nx"]]
        )
        
        # Generate noverify info
        noverify <- ""
        if (criteria$noverify_filter_applied) {
            noverify <- glue(
                "<br/>**Screened: n = {n}<br/>**Not screened: n = {nx}",
                n = criteria$elig_n,
                nx = criteria$noverify_filter_nx
            )
        }
        
        # Generate incl info
        incl_miss <- glue(
            "<br/>*Misclassifications: n = {n}",
            n = criteria[["misclass_filter_nx"]]
        )
        
        # Road length unit
        if ("geom_unit" %in% names(criteria)) {
            len_unit <- criteria$geom_unit
        } else {
            len_unit = "meters"
        }
        
        # Generate single row addition
        row_adds <- paste0(row_adds, glue(
            row_temp,
            i = i,
            y = y,
            city = str_to_title(criteria[["city"]]),
            open_n = criteria[["data_n"]],
            open_len = paste(round(criteria[["data_len"]], 1), len_unit),
            open_date = criteria[["data_date"]],
            elig_n = criteria$elig_n + criteria$noverify_filter_nx,
            elig_len = paste(round(criteria[["elig_len"]], 1), len_unit),
            elig_inelig = elig_inelig,
            elig_dup = elig_dup,
            elig_poly = elig_poly,
            incl_n = criteria[["incl_n"]] + criteria$noverify_filter_nx,
            incl_len = paste(round(criteria[["incl_len"]] + criteria$noverify_filter_lenx, 1), len_unit),
            incl_miss = incl_miss,
            incl_dup = "",
            noverify = noverify
        ))
        
        # Move row below
        y <- y - y_gap
    }
    
    # Filter and screening lines
    line_filter <- glue("
        filter1[
            label = 'Filtering',
            height = 0.25,
            shape = plaintext,
            style='', pos = '1.6,1.425!'
        ]
        filter2[
            style = invis,
            pos = '1.6,{y}!'
        ]
        filter1 -> filter2 [style = dashed, dir = none, color = '#b0b0b0']
    ", y = y - -0.96)
    line_screen <- glue("
        screen1[
            label = 'Screening',
            height = 0.25,
            shape = plaintext,
            style='', pos = '4.85,1.425!'
        ]
        screen2[
            style = invis,
            pos = '4.85,{y}!'
        ]
        screen1 -> screen2 [style = dashed, dir = none, color = '#b0b0b0']
    ", y = y - -0.96)
    
    # Bottom note
    note_bottom <- glue("
        note[
            label=<<i>{text}</i>>,
            style = '',
            shape = plaintext,
            fontsize = 12,
            pos = '3.25,{y}!'
        ]
    ", text = note, y = y - -0.69)
    
    # Generate graphviz diag
    out <- grViz(paste0(
        "digraph {",
        diag_settings,
        row_top,
        row_adds,
        line_filter,
        line_screen,
        note_bottom,
        "}"
    ))
    return(out)
}

Function 5b: diag_criteria_details

Diagram the segment inclusion criteria results in detail.

This function draws a flow diagram of detailed methods for segment inclusion criteria using output from filter_criteria.

#' Diagram the segment inclusion criteria results in detail
#' 
#' This function draws a flow diagram of detailed methods for segment inclusion criteria using output from \code{\link{filter_criteria}}.
#'
#' @param criteria_data A list of lists in the structure of the output from \code{\link{filter_criteria}}.
#' @param city The city (char) to create the diagram for. If `NULL`, this function produces a list of diagrams where keys are the city name and values are the diagrams.
#' @param out_render Set to TRUE to render the diagram and return \code{\link[DiagrammeR]{grViz}} objects or FALSE to return the text used to generate the diagram.
#' @return A list of \code{\link[DiagrammeR]{grViz}} objects if `city` is `NULL`, or a single \code{\link[DiagrammeR]{grViz}} if `city` is provided. The \code{\link[DiagrammeR]{grViz}} objects become text (char) if `out_render` is `FALSE`.
#' @export
#'
diag_criteria_details <- function(criteria_data, city = NULL, out_render = TRUE) {
    
    # Filter for city if avail
    if (!is.null(city)) {
        criteria_data <- criteria_data[sapply(criteria_data, function (x) x$city == city)]
    }
    
    # Generate diagrams for each city
    out <- list()
    for (i in 1:length(criteria_data)) {
        
        # Diag vars
        criteria <- criteria_data[[i]]
        x_edge <- -4
        
        # Diag settings
        diag_settings <- "
            rankdir = TB
            node[
                shape = box
                width = 10
                height = 1.8
                style = filled
                fillcolor = white
                penwidth = 1.5
                fontsize = 16
                fontname = 'Arial'
                margin = 0.25
            ]
            edge[
                arrowhead = vee,
                arrowtail = vee
            ]
            layout = neato
        "
        
        # Step 1 identification
        s1 <- glue("
            id_title[
                label = <<b>Identification</b>>
                pos = '-8.5,0!'
                width = 2
                height = 1.9
                fillcolor = '#d7e9fe'
                style = 'rounded,filled'
            ]
            id[
                label = 'Shapefile from: {url}\\lDownloaded: {date}\\lN = {n} Segments\\l'
                pos = '0,0!'
                width = 14
            ]
            
            id_top[
                style = invis
                pos = '{x},0!'
            ]
            id_bot[
                style = invis
                pos = '{x},-2.25!'
            ]
            id_top -> id_bot
        ",
            url = criteria$data_url,
            date = criteria$data_date,
            n = criteria$data_n,
            x = x_edge
        )
        
        # Step 2 vars
        fi <- 0
        y <- -0
        s2 <- ""
        
        # Step 2 filtering infra
        if (criteria$infra_filter_applied) {
            fi <- fi + 1
            y <- y - 2.25
            s2 <- glue("
                {s2}

                filter{fi}[
                    label = 'Filter for Dedicated Cycling Infrastructure\\l{column} in {filter}\\l(n = {n})\\l'
                    pos = '-2,{y}!'
                ]
                filter{fi}x[
                    label = 'Segments Excluded\\l(n = {nx})\\l'
                    pos = '5.5,{y}!'
                    width = 3
                ]
                filter{fi} -> filter{fi}x
                
                filter{fi}_top[
                    style = invis
                    pos = '{x},{y}!'
                ]
                filter{fi}_bot[
                    style = invis
                    pos = '{x},{y - 2.25}!'
                ]
                filter{fi}_top -> filter{fi}_bot
            ",
                column = criteria$infra_col,
                filter = str_replace_all(
                    str_wrap(
                        paste0(
                            criteria$infra_filter,
                            collapse = ", "
                        ),
                        width = 83
                    ),
                    "[\r\n]",
                    "\\\\l"
                ),
                n = criteria$infra_filter_n,
                nx = criteria$infra_filter_nx,
                fi = fi,
                y = y,
                x = x_edge,
                s2 = s2
            )
        }
        
        # Step 2 filtering road
        if (criteria$road_filter_applied) {
            fi <- fi + 1
            y <- y - 2.25
            s2 <- glue("
                {s2}

                filter{fi}[
                    label = 'Filter for Infrastructure Located on Roadway\\l{column} != {filter}\\l(n = {n})\\l'
                    pos = '-2,{y}!'
                ]
                filter{fi}x[
                    label = 'Segments Excluded\\l(n = {nx})\\l'
                    pos = '5.5,{y}!'
                    width = 3
                ]
                filter{fi} -> filter{fi}x
                
                filter{fi}_top[
                    style = invis
                    pos = '{x},{y}!'
                ]
                filter{fi}_bot[
                    style = invis
                    pos = '{x},{y - 2.25}!'
                ]
                filter{fi}_top -> filter{fi}_bot
            ",
                column = criteria$road_col,
                filter = str_replace_all(
                    str_wrap(
                        paste0(
                            criteria$road_filter,
                            collapse = ", "
                        ),
                        width = 83
                    ),
                    "[\r\n]",
                    "\\\\l"
                ),
                n = criteria$road_filter_n,
                nx = criteria$road_filter_nx,
                fi = fi,
                y = y,
                x = x_edge,
                s2 = s2
            )
        }

        # Step 2 filtering status
        if (criteria$status_filter_applied) {
            fi <- fi + 1
            y <- y - 2.25
            s2 <- glue("
                {s2}

                filter{fi}[
                    label = 'Filter for Active Infrastructure Status\\l{column} != {filter}\\l(n = {n})\\l'
                    pos = '-2,{y}!'
                ]
                filter{fi}x[
                    label = 'Segments Excluded\\l(n = {nx})\\l'
                    pos = '5.5,{y}!'
                    width = 3
                ]
                filter{fi} -> filter{fi}x
                
                filter{fi}_top[
                    style = invis
                    pos = '{x},{y}!'
                ]
                filter{fi}_bot[
                    style = invis
                    pos = '{x},{y - 2.25}!'
                ]
                filter{fi}_top -> filter{fi}_bot
            ",
                column = criteria$status_col,
                filter = str_replace_all(
                    str_wrap(
                        paste0(
                            criteria$status_filter,
                            collapse = ", "
                        ),
                        width = 83
                    ),
                    "[\r\n]",
                    "\\\\l"
                ),
                n = criteria$status_filter_n,
                nx = criteria$status_filter_nx,
                fi = fi,
                y = y,
                x = x_edge,
                s2 = s2
            )
        }

        # Step 2 filtering null geom
        if (criteria$geom_filter_null_applied) {
            fi <- fi + 1
            y <- y - 2.25
            s2 <- glue("
                {s2}

                filter{fi}[
                    label = 'Filter for Null Geometry\\l{column} is not null\\l(n = {n})\\l'
                    pos = '-2,{y}!'
                ]
                filter{fi}x[
                    label = 'Segments Excluded\\l(n = {nx})\\l'
                    pos = '5.5,{y}!'
                    width = 3
                ]
                filter{fi} -> filter{fi}x
                
                filter{fi}_top[
                    style = invis
                    pos = '{x},{y}!'
                ]
                filter{fi}_bot[
                    style = invis
                    pos = '{x},{y - 2.25}!'
                ]
                filter{fi}_top -> filter{fi}_bot
            ",
                column = criteria$geom_col,
                n = criteria$geom_filter_null_n,
                nx = criteria$geom_filter_null_nx,
                fi = fi,
                y = y,
                x = x_edge,
                s2 = s2
            )
        }
        
        # Step 2 filtering dup geom
        if (criteria$geom_filter_dup_applied) {
            fi <- fi + 1
            y <- y - 2.25
            s2 <- glue("
                {s2}

                filter{fi}[
                    label = 'Filter for Duplicate Geometry\\l{column} is not duplicated\\l(n = {n})\\l'
                    pos = '-2,{y}!'
                ]
                filter{fi}x[
                    label = 'Segments Excluded\\l(n = {nx})\\l'
                    pos = '5.5,{y}!'
                    width = 3
                ]
                filter{fi} -> filter{fi}x
                
                filter{fi}_top[
                    style = invis
                    pos = '{x},{y}!'
                ]
                filter{fi}_bot[
                    style = invis
                    pos = '{x},{y - 2.25}!'
                ]
                filter{fi}_top -> filter{fi}_bot
            ",
                column = criteria$geom_col,
                n = criteria$geom_filter_dup_n,
                nx = criteria$geom_filter_dup_nx,
                fi = fi,
                y = y,
                x = x_edge,
                s2 = s2
            )
        }
        
        # Step 2 filtering
        s2 <- glue("
            filter_title[
                label = <<b>Filtering</b>>
                pos = '-8.5,{y}!'
                width = 2
                height = {h}
                fillcolor = '#d7e9fe'
                style = 'rounded,filled'
            ]
            {s2}
        ",
            h = (fi * 2.1),
            fi = fi,
            y = y + if (fi == 1) 0 else (((fi -1) / 2) * 2.25),
            s2 = s2
        )
        
        # Step 3 eligible
        y <- y - 2.25
        s3 <- glue("
            elig_title[
                label = <<b>Eligible</b>>
                pos = '-8.5,{y}!'
                width = 2
                height = 1.9
                fillcolor = '#d7e9fe'
                style = 'rounded,filled'
            ]
            elig[
                label = 'Segments Included for Data Entry and Screening\\l(n = {n})\\l'
                pos = '0,{y}!'
                width = 14
            ]
            
            elig_top[
                style = invis
                pos = '{x},{y}!'
            ]
            elig_bot[
                style = invis
                pos = '{x},{y - 2.25}!'
            ]
            elig_top -> elig_bot
        ",
            n = criteria$elig_n + criteria$noverify_filter_nx,
            y = y,
            x = x_edge
        )
        
        # Step 4 Screening
        s4 <- ""
            
        # Step 4 title
        y <- y - 2.65
        s4 <- glue("
            screen_title[
                label = <<b>Screening</b>>
                pos = '-8.5,{y}!'
                width = 2
                height = 2.55
                fillcolor = '#d7e9fe'
                style = 'rounded,filled'
            ]
        ",
            n = criteria$misclass_n,
            y = y
        )
        
        # Step 4 noverify
        misclass_noverify <- ""
        if (criteria$noverify_filter_nx > 0) {
            misclass_noverify <- glue(
                "{n} screened, {nx} not screened\\l",
                n = criteria$misclass_filter_n,
                nx = criteria$noverify_filter_nx
            )
        }
        
        # Step 4 misclass
        s4 <- glue("
            {s4}
            
            screen[
                label = 'Exclusion of Misclassifications and\\lDuplicates following Screening\\l{column} != {filter}\\l{noverify}(n = {n})\\l'
                pos = '-4.5,{y}!'
                width = 5
                height = 2.5
            ]
            screenx[
                label = '{misclass}'
                pos = '3,{y}!'
                width = 7.95
                height = 2.5
            ]
            screen -> screenx
            
            screen_top[
                style = invis
                pos = '{x},{y - 0.35}!'
            ]
            screen_bot[
                style = invis
                pos = '{x},{y - 2.75}!'
            ]
            screen_top -> screen_bot
        ",
            column = criteria$misclass_col,
            filter = str_replace_all(
                str_wrap(
                    paste0(
                        criteria$misclass_filter,
                        collapse = ", "
                    ),
                    width = 83
                ),
                "[\r\n]",
                "\\\\l"
            ),
            n = criteria$misclass_filter_n + criteria$noverify_filter_nx,
            noverify = misclass_noverify,
            misclass = paste0(
                "Misclassifications: ",
                criteria$misclass_filter_uniq_n[[1]],
                " (n = ",
                criteria$misclass_filter_uniq_n[[2]],
                ")\\l",
                collapse = ""
            ),
            y = y,
            x = x_edge,
            s4 = s4
        )
        
        # Step 5 noverify
        incl_noverify <- ""
        if (criteria$noverify_filter_nx > 0) {
            incl_noverify <- glue(
                "{n} verified, {nx} not verified\\l",
                n = criteria$incl_n,
                nx = criteria$noverify_filter_nx
            )
        }
        
        # Step 5 Inclusions
        y <- y - 2.75
        s5 <- glue("
            incl_title[
                label = <<b>Inclusions</b>>
                pos = '-8.5,{y}!'
                width = 2
                height = 1.9
                fillcolor = '#c8e29d'
                style = 'rounded,filled'
            ]
            incl[
                label = '{verified}Inclusions\\l{noverify}(n = {n})\\l'
                pos = '0,{y}!'
                width = 14
            ]
        ",
            verified = if (criteria$noverify_filter_nx > 0) "Verified and Non-verified " else "Verified ",
            noverify = incl_noverify,
            n = criteria$incl_n + criteria$noverify_filter_nx,
            y = y
        )
        
        # Combine steps
        out[[criteria$city]] <- paste0(
            "digraph {\n",
            diag_settings,
            "\n",
            s1,
            "\n",
            s2,
            "\n",
            s3,
            "\n",
            s4,
            "\n",
            s5,
            "\n",
            "}"
        )
    }
    
    # Return diagrams or single diagram if city is given
    out <- if (length(out) > 1) out else out[[1]]
    out <- if (out_render) grViz(out) else out
    return(out)
}

Function 6: prep_infra

Prepare Infrastructure Changes Data for Mapping.

#' Prepare Infrastructure Changes Data for Mapping
#'
#' This function prepares city data in a list format for mapping infrastructure changes since a target year.
#'
#' @param map_list A list of lists, where each list contains the following structure defining the city mapping data and settings:
#' \itemize{
#'  \item \code{title}: the title (char) of the main city map.
#'  \item \code{data}: the sf data.frame containing road segments of the install, upgrade1, and upgrade2 years and types (required).
#'  \item \code{downtown_bbox}: a vector (numeric) containing the coordinates of the downtown region's bounding box in xmin, ymin, xmax, and ymax respectively.
#' }
#' @param year_since The year (numeric) since to examine infrastructure changes.
#' 
#' @return A list of lists, where each list has keys and values from \code{map_list}, and the following additional keys:
#' \itemize{
#'  \item \code{data_map}: a sf data.frame with an additional `changes` column indicating the infrastructure changes since the target `year_since`.
#'  \item \code{data_bbox}: a sf data.frame of the bounding box of `data_map`.
#'  \item \code{data_downtown}: Same as `data_map` except for the downtown region indicated by `downtown_bbox`.
#'  \item \code{data_downtown_bbox}: a sf data.frame of the bounding box of `data_downtown`.
#'  \item \code{map_colors}: the colors (char) for each of the infrastructure change categories.
#'  \item \code{map_column}: the column name (char) to be mapped
#'  \item \code{downtown_title}: the name (char) of the downtown subset map
#' }
#' @export
#'
prep_infra <- function(
        map_list,
        year_since = settings$infra_changes_year
) {
    
    # Create color palette
    colors <- c("green", "orange", "gray")
    names(colors) <- c(
        glue("New Infrastructure Since Jan. {year}", year = year_since), # green
        glue("Upgraded Infrastructure Since Jan. {year}", year = year_since), # orange
        "Unchanged Infrastructure" # gray
    )
    
    # Generate maps per city
    out <- map_list
    for (i in 1:length(map_list)) {
        
        # Get city vars
        city <- map_list[[i]]
        
        # Create downtown title if not given
        if (!"downtown_title" %in% names(city)) {
            id <- names(map_list)[[i]]
            downtown_title <- glue(
                "Downtown {id}",
                id = str_to_title(id)
            )
        } else {
            downtown_title <- city$downtown_title
        }
        
        # Create col to identify infra changes
        map_data <- city$data %>%
            mutate(
                changes = case_when(
                    (
                        !is.na(verify_upgrade1_type) &
                        !is.na(verify_upgrade1_year) &
                        verify_upgrade1_year >= year_since
                    ) | (
                        !is.na(verify_upgrade2_type) &
                        !is.na(verify_upgrade2_year) &
                        verify_upgrade2_year >= year_since
                    ) ~ glue(
                        "Upgraded Infrastructure Since Jan. {year}",
                        year = year_since
                    ),
                    !is.na(verify_install_type) &
                    !is.na(verify_install_year) &
                    verify_install_year >= year_since ~
                    glue(
                        "New Infrastructure Since Jan. {year}",
                        year = year_since
                    ),
                    .default = "Unchanged Infrastructure"
                )
            )
        
        # Create bounding box for city
        city_bbox <- st_as_sfc(
            st_bbox(city$data, crs = 4326)
        )
        
        # Create bounding box for downtown region
        downtown_bbox <- st_as_sfc(
            st_bbox(city$downtown_bbox, crs = 4326)
        )
        
        # Subset data for downtown region
        submap_data <- map_data %>% st_crop(downtown_bbox)
        
        # Add prep data to cities list
        out[[i]]$data_map <- map_data
        out[[i]]$data_bbox <- city_bbox
        out[[i]]$data_downtown <- submap_data
        out[[i]]$data_downtown_bbox <- downtown_bbox
        out[[i]]$map_colors <- colors
        out[[i]]$map_column <- "changes"
        out[[i]]$downtown_title <- downtown_title
    }
    return(out)
}

Function 6a: map_infra

Maps Infrastructure Changes.

Creates maps of infrastructure changes since a certain year for each city and their downtown region using output from prep_map.

#' Map Infrastructure Changes
#'
#' This function maps infrastructure changes since a target year.
#'
#' @inheritParams prep_infra
#' 
#' @return A `patchwork` object of `ggplot` objects combined together to form the multiple maps in arranged on a layout.
#' @export
#'
map_infra <- function(
        map_list,
        year_since = settings$infra_changes_year
) {
    
    # Prepare data for maps
    cities_prep <- prep_infra(map_list)
        
    # Generate maps per city
    out <- list()
    for (i in 1:length(cities_prep)) {
        
        # Get city vars
        city <- cities_prep[[i]]
        id <- names(cities_prep)[[i]]
        
        # Create base map for city and downtown map
        base_map <- ggplot() +
            annotation_map_tile(
                zoomin = 1,
                type = "cartolight",
                cachedir = "../data/cache"
            ) +
            annotation_north_arrow(
                width = unit(0.2, "cm"),
                height = unit(0.5, "cm"),
                location = "br"
            ) +
            annotation_scale(
                location = "bl",
                style = "ticks"
            ) +
            scale_color_manual(values = city$map_colors) +
            fixed_plot_aspect(ratio = 1.5) +
            theme_void()
        
        # Generate city map
        out[[id]] <- base_map +
            ggtitle(city$title) +
            layer_spatial(city$data_map, aes(color = .data[[city$map_column]])) +
            layer_spatial(city$data_downtown_bbox, color = "red", fill = NA, linewidth = 0.5) +
            guides(colour = guide_legend(
                override.aes = list(linewidth = 3)
            ))
        
        # Generate downtown map
        out[[paste0(id, "_downtown")]] <- base_map +
            ggtitle(city$downtown_title) +
            layer_spatial(city$data_downtown, aes(color = .data[[city$map_column]])) +
            guides(color = "none")
    }
    
    # Combine maps into single layout
    out <- wrap_plots(out, ncol = 2) + 
        plot_layout(guides = "collect") &
        theme(
            legend.position = "bottom",
            legend.title = element_blank(),
            plot.title = element_text(
                size = 8,
                margin = margin(t = 8, b = -20, l = 8)
            ),
            plot.margin = margin(t = 8, l = 0, r = 0),
            panel.border = element_rect(
                colour = "gray20",
                fill = NA,
                linewidth = 0.5
            )
        )
    return(out)
}

Function 6b: map_infra_detail

Maps Infrastructure Changes in Detail.

Creates enlarged maps of infrastructure changes since a certain year for each city and their downtown region.

#' Map Infrastructure Changes in Detail
#'
#' This function creates enlarged maps of infrastructure changes since a target year.
#'
#' @inheritParams prep_infra
#' @param city_key They city key (char) to map from `map_list`. If `NULL`, maps all cities and returns a list, otherwise if given, returns an item from the list (required).
#' @param map_inset Set to `TRUE` to create an inset map of the downtown region or `FALSE` to omit the inset map.
#' @param map_inset_position A named vector (numeric) containing four values indicating the position of the inset map with the names being `left`, `bottom`, `right`, and `top` aligned to the `full` area. See \link[patchwork]{inset_element}.
#' 
#' @param map_ratio The aspect ratio (numeric) of the map.
#' @param map_inset_ratio The aspect ratio (numeric) of the subset map.
#' @return A list of `patchwork` object of `ggplot` objects combined together to form the enlarged maps, where the keys are the names of the cities as in `map_list`. If `city_key` is provided, returns only one of the items from this list.
#' @export
#'
map_infra_detail <- function(
        map_list,
        city_key = NULL,
        map_inset = TRUE,
        map_inset_position = c(
            left = 0.6,
            bottom = 0.6,
            right = 1,
            top = 1
        ),
        map_ratio = 1.75,
        map_inset_ratio = 2,
        year_since = settings$infra_changes_year,
        ...
) {
    
    # Only map one city if given
    if (!is.null(city_key)) {
        map_list <- list(map_list[[city_key]])
        names(map_list) <- city_key
    }
    
    # Prepare data for maps
    cities_prep <- prep_infra(map_list)
        
    # Generate enlarged maps per city
    out <- list()
    for (i in 1:length(cities_prep)) {
        
        # Get city vars
        city <- cities_prep[[i]]
        id <- names(cities_prep)[[i]]
        
        # Create base map for city and downtown map
        base_map <- ggplot() +
            annotation_map_tile(
                zoomin = 1,
                type = "cartolight",
                cachedir = "../data/cache"
            ) +
            scale_color_manual(values = city$map_colors) +
            theme_void()
        
        # Generate city map
        if ("map_ratio" %in% city) {
            map_ratio <- city$map_ratio
        }
        city_map <- base_map +
            fixed_plot_aspect(ratio = map_ratio) +
            annotation_north_arrow(
                width = unit(0.2, "cm"),
                height = unit(0.5, "cm"),
                location = "br"
            ) +
            annotation_scale(
                location = "bl",
                style = "ticks"
            ) +
            layer_spatial(city$data_map, aes(color = .data[[city$map_column]])) +
            guides(colour = guide_legend(
                override.aes = list(linewidth = 3)
            )) +
            theme(
                legend.position = "bottom",
                legend.title = element_blank(),
                panel.border = element_rect(
                    colour = "gray20",
                    fill = NA,
                    linewidth = 0.5
                )
            )
        
        # Add inset map as downtown region
        map_inset <- if ("map_inset" %in% names(city)) city$map_inset else map_inset
        if (map_inset) {
            
            # Generate downtown map
            if ("map_inset_ratio" %in% city) {
                map_inset_ratio <- city$map_inset_ratio
            }
            downtown_map <- base_map +
                fixed_plot_aspect(ratio = map_inset_ratio) +
                layer_spatial(city$data_downtown, aes(color = .data[[city$map_column]])) +
                guides(color = "none") +
                annotation_scale(
                    location = "tl",
                    style = "ticks"
                ) +
                theme(
                    panel.border = element_rect(
                        colour = "black",
                        fill = NA,
                        linewidth = 0.75
                    )
                )
            
            # Create final map with inset
            if ("map_inset_position" %in% names(city)) {
                map_inset_position <- city$map_inset_position
            }
            out[[id]] <- city_map + inset_element(
                downtown_map,
                left = map_inset_position[["left"]],
                bottom = map_inset_position[["bottom"]],
                right = map_inset_position[["right"]],
                top = map_inset_position[["top"]],
                align_to = "full"
            )
            
        } else {
            
            # No inset for final map
            out[[id]] <- city_map
        }
    }
    
    # Return list of all city maps or single map if city_key given
    if (!is.null(city_key)) {
        out <- out[[city_key]]
    }
    return(out)
}

Data

Load raw data provided by Konrad Samsel.

Vancouver Raw Data

vanc_raw <- read_sf("../data/raw/vancouver/Vancouver AS KS Mar26.shp")

Map

Note: Only segments with verified installations are shown (n = 745 of 3666).

tmap_mode("view")
tm_shape(vanc_raw %>% filter(!is.na(INST_TMIN))) +
    tm_lines(col = "INST_TMIN", popup.vars = TRUE)

Data

vanc_raw %>% as_tibble

Details

print(vanc_raw)
## Simple feature collection with 3666 features and 79 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -123.2238 ymin: 49.19899 xmax: -123.0233 ymax: 49.31428
## Geodetic CRS:  WGS 84
## # A tibble: 3,666 × 80
##    object_id bike_route   street_nam bikeway_ty        subtype status street_seg
##    <chr>     <chr>        <chr>      <chr>             <chr>   <chr>  <chr>     
##  1 294725    Highbury     Highbury   Local Street      <NA>    Active Residenti…
##  2 294726    Highbury     Highbury   Local Street      <NA>    Active Residenti…
##  3 294731    Off-Broadway W 8th Ave  Local Street      <NA>    Active Residenti…
##  4 294732    Off-Broadway W 8th Ave  Local Street      <NA>    Active Residenti…
##  5 294733    Northern     Off Street Protected Bike L… OSS     Active Lane      
##  6 294736    Off-Broadway W 5th Ave  Local Street      <NA>    Active Residenti…
##  7 294737    Off-Broadway W 8th Ave  Local Street      <NA>    Active Residenti…
##  8 294738    Off-Broadway W 7th Ave  Local Street      <NA>    Active Residenti…
##  9 294739    Off-Broadway W 7th Ave  Local Street      <NA>    Active Residenti…
## 10 294742    Off-Broadway W 7th Ave  Local Street      <NA>    Active Residenti…
## # ℹ 3,656 more rows
## # ℹ 73 more variables: overall_di <chr>, bikeway_di <chr>, vehicle_di <chr>,
## #   speed_limi <chr>, surface_ty <chr>, aaa_networ <chr>, aaa_segmen <chr>,
## #   w_n_bound_ <chr>, e_s_bound_ <chr>, snow_remov <chr>, segment_le <dbl>,
## #   year_of_co <chr>, constructi <chr>, upgrade_ye <chr>, notes <chr>,
## #   OID_1 <dbl>, object_i_1 <dbl>, ID_DATAENT <dbl>, ID_ROUTE <chr>,
## #   CHECK_FLAG <chr>, EXCL_FLAG <chr>, EXCL_REAS <chr>, ENTRY_ORDE <dbl>, …

Files

Save vancouver-bikeways-raw-v1.geojson file:

# Save geojson
vanc_raw %>%
    write_sf("../data/vancouver-bikeways-raw-v1.geojson", delete_dsn = TRUE)

Save vancouver-bikeways-raw-v1.csv file:

# Save csv
# st_read("../data/vancouver-bikeways-raw-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
vanc_raw %>%
    mutate(
        geometry = st_as_text(geometry),
        geometry_crs = st_crs(vanc_raw)$proj4string,
        .before = geometry
    ) %>%
    write_csv("../data/vancouver-bikeways-raw-v1.csv", na = "")

Calgary Raw Data


# Load raw data
calg_bikeways <- read_csv("../data/calgary_bikeways_2009_2022_v1.csv")
calg_roads <- read_csv("../data/calgary_roads_2009_2022_v1.csv")

# Combine raw data
calg <- calg_bikeways %>%
    select(
        SHAPE_ID,
        YEAR_ORIG,
        INST_YR,
        INST_MIN_HTYPE,
        UPGR1_YR,
        UPGR1_MIN_HTYPE,
        UPGR2_YR,
        UPGR2_MIN_HTYPE,
        ATR_SEGMENT_LENGTH
    ) %>%
    left_join(
        calg_roads %>% select(
            shape_id,
            ctp_class
        ),
        by = join_by(SHAPE_ID == shape_id)
    ) %>%
    rename(
        id = SHAPE_ID,
        install_year_orig = YEAR_ORIG,
        install_year = INST_YR,
        install_type = INST_MIN_HTYPE,
        upgrade1_year = UPGR1_YR,
        upgrade1_type = UPGR1_MIN_HTYPE,
        upgrade2_year = UPGR2_YR,
        upgrade2_type = UPGR2_MIN_HTYPE,
        segment_len = ATR_SEGMENT_LENGTH,
        segment_type = ctp_class
    ) %>%
    mutate(
        segment_len = segment_len / 1000,
        road_type = case_when( # create road types
            segment_type %in% c( # arterial equiv
                "Arterial Street",
                "Industrial Arterial",
                "Local Arterial",
                "Parkway",
                "Urban Boulevard"
            ) ~ "Arterial",
            segment_type %in% c( # collector equiv
                "Neighbourhood Boulevard",
                "Collector",
                "Primary Collector",
                "Skeletal Road"
            ) ~ "Collector",
            segment_type %in% c( # local equiv
                "Access Route",
                "Residential Street",
                "Activity Center Street",
                "Historic Road Allowance",
                "Lanes (Alleys)",
                "Industrial Street"
            ) ~ "Local",
            .default = segment_type
        )
    )
calg

Map

Note: Only segments with verified installations are shown (n = calg_raw %>% filter(!is.na(INST_TMIN)) %>% nrow of calg_raw %>% nrow).

# tmap_mode("view")
# tm_shape(calg_raw %>% filter(!is.na(INST_TMIN))) +
#   tm_lines(col = "INST_TMIN", popup.vars = TRUE)

Data

# calg_raw %>% as_tibble

Details

# print(calg_raw)

Files

Save calgary-bikeways-raw-v1.geojson file:

# Save geojson
# calg_raw %>%
#   write_sf("../data/calgary-bikeways-raw-v1.geojson", delete_dsn = TRUE)

Save calgary-bikeways-raw-v1.csv file:

# Save csv
# st_read("../data/calgary-bikeways-raw-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
# calg_raw %>%
#   mutate(
#       geometry = st_as_text(geometry),
#       geometry_crs = st_crs(calg_raw)$proj4string,
#       .before = geometry
#   ) %>%
#   write_csv("../data/calgary-bikeways-raw-v1.csv", na = "")

Toronto Raw Data

toron_raw <- read_sf("../data/raw/toronto/Toronto AS 1323 V3.shp")

Map

Note: Only segments with verified installations are shown (n = 331 of 1323).

tmap_mode("view")
tm_shape(toron_raw %>% filter(!is.na(INST_TMIN))) +
    tm_lines(col = "INST_TMIN", popup.vars = TRUE)

Data

toron_raw %>% as_tibble

Details

print(toron_raw)
## Simple feature collection with 1323 features and 88 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.63039 ymin: 43.58221 xmax: -79.11803 ymax: 43.85546
## Geodetic CRS:  WGS 84
## # A tibble: 1,323 × 89
##    `_id1` OBJECTI2 SEGMENT3 INSTALL4 UPGRADE5 PRE_AMA6 STREET_7         FROM_ST8
##     <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <chr>    <chr>            <chr>   
##  1      8        8        8     2001     2021 <NA>     Bloor St E       Parliam…
##  2     17       17       17     2001     2015 <NA>     Lake Shore Blvd… Humber …
##  3     18       18       18     2001     2015 <NA>     Lake Shore Blvd… 37 M E …
##  4     19       19       19     2001     2015 <NA>     Lake Shore Blvd… 50.7 M …
##  5     38       38       38     2001        0 <NA>     Queens Quay W    Martin …
##  6     39       39       39     2001     2016 <NA>     Davenport Rd     Cotting…
##  7     40       40       40     2001     2016 <NA>     Elizabeth St     College…
##  8     41       41       41     2001        0 <NA>     Gerrard St E     Yonge St
##  9     42       42       42     2001     2016 <NA>     Macpherson Ave   Davenpo…
## 10     43       43       43     2001     2016 <NA>     Lake Shore Blvd… Marine …
## # ℹ 1,313 more rows
## # ℹ 81 more variables: TO_STRE9 <chr>, ROADCLA10 <chr>, CNPCLAS11 <chr>,
## #   SURFACE12 <chr>, OWNER13 <chr>, DIR_LOW14 <chr>, INFRA_L15 <chr>,
## #   SEPA_LO16 <chr>, SEPB_LO17 <chr>, ORIG_LO18 <chr>, DIR_HIG19 <chr>,
## #   INFRA_H20 <chr>, SEPA_HI21 <chr>, SEPB_HI22 <chr>, ORIG_HI23 <chr>,
## #   BYLAWED24 <chr>, EDITOR25 <chr>, LAST_ED26 <chr>, UPGRADE27 <chr>,
## #   CONVERT28 <chr>, OBJ2 <dbl>, ID_SEAN <chr>, C_INST_YR <dbl>, …

Files

Save toronto-bikeways-raw-v1.geojson file:

# Save geojson
toron_raw %>%
    write_sf("../data/toronto-bikeways-raw-v1.geojson", delete_dsn = TRUE)

Save toronto-bikeways-raw-v1.csv file:

# Save csv
# st_read("../data/toronto-bikeways-raw-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
toron_raw %>%
    mutate(
        geometry = st_as_text(geometry),
        geometry_crs = st_crs(toron_raw)$proj4string,
        .before = geometry
    ) %>%
    write_csv("../data/toronto-bikeways-raw-v1.csv", na = "")

Preprocessing

Vancouver Preprocessed Data

# Preprocess data
vanc_preprocess <- vanc_raw %>%
    select( # select and rename
        id = object_id,
        street = street_nam,
        status = status,
        road_type = street_seg,
        install_year = year_of_co,
        install_type = bikeway_ty,
        verify_install_year = INST_YR,
        verify_install_date = INST_DATE,
        verify_install_type = INST_TMIN,
        verify_install_comment = INST_COMM,
        verify_upgrade1_year = UPGR1_YR,
        verify_upgrade1_date = UPGR1_DATE,
        verify_upgrade1_type = UPGR1_TMIN,
        verify_upgrade1_comment = UPGR1_COMM,
        verify_upgrade2_year = UPGR2_YR,
        verify_upgrade2_date = UPGR2_DATE,
        verify_upgrade2_type = UPGR2_TMIN,
        verify_upgrade2_comment = UPGR2_COMM,
        verify_misclass = EXCL_REAS
    ) %>%
    mutate( # data types
        id = as.character(id),
        street = as.character(street),
        road_type = as.character(road_type),
        install_year = as.numeric(install_year),
        install_type = as.character(install_type),
        verify_install_year = as.numeric(verify_install_year),
        verify_install_date = as.character(verify_install_date),
        verify_install_type = as.character(verify_install_type),
        verify_install_comment = as.character(verify_install_comment),
        verify_upgrade1_year = as.numeric(verify_upgrade1_year),
        verify_upgrade1_date = as.character(verify_upgrade1_date),
        verify_upgrade1_type = as.character(verify_upgrade1_type),
        verify_upgrade1_comment = as.character(verify_upgrade1_comment),
        verify_upgrade2_year = as.numeric(verify_upgrade2_year),
        verify_upgrade2_date = as.character(verify_upgrade2_date),
        verify_upgrade2_type = as.character(verify_upgrade2_type),
        verify_upgrade2_comment = as.character(verify_upgrade2_comment),
        verify_misclass = as.character(verify_misclass)
    ) %>%
    mutate( # clean values
        install_year = na_if(install_year, 0),
        verify_install_year = na_if(verify_install_year, 0),
        verify_install_date = na_if(verify_install_date, "NA"),
        verify_install_type = na_if(verify_install_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_install_comment = na_if(verify_install_comment, "NA"),
        verify_upgrade1_year = na_if(verify_upgrade1_year, 0),
        verify_upgrade1_date = na_if(verify_upgrade1_date, "NA"),
        verify_upgrade1_type = na_if(verify_upgrade1_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_upgrade1_comment = na_if(verify_upgrade1_comment, "NA"),
        verify_upgrade2_year = na_if(verify_upgrade2_year, 0),
        verify_upgrade2_date = na_if(verify_upgrade2_date, "NA"),
        verify_upgrade2_type = na_if(verify_upgrade2_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_upgrade2_comment = na_if(verify_upgrade2_comment, "NA"),
        verify_misclass =  na_if(verify_misclass, "NA") %>%
            str_trim %>%
            str_to_title
    ) %>%
    mutate( # add column for non-verified infra types
        no_verify_install_type = if_else(
            is.na(verify_install_type) & install_type == "Local Street",
            "Local Street",
            NA
        ),
        .after = verify_misclass
    ) %>%
    mutate( # add local street as non-verified LSB
        verify_install_type = if_else(
            is.na(verify_install_type) & install_type == "Local Street",
            "LSB",
            verify_install_type
        ),
        verify_install_year = if_else(
            is.na(verify_install_year) & install_type == "Local Street",
            install_year,
            verify_install_year
        )
    ) %>%
    mutate( # create col for recoded road types
        road_type_recode = case_when( # create road types
            road_type %in% c( # arterial equiv
                "Arterial"
            ) ~ "Arterial",
            road_type %in% c( # collector equiv
                "Collector",
                "Secondary Arterial",
                "Sec Arterial"
            ) ~ "Collector",
            road_type %in% c( # local equiv
                "Lane",
                "Residential",
                "Leased",
                "Recreational"
            ) ~ "Local",
            .default = road_type
        ),
        .after = road_type
    ) %>%
    mutate( # create col for segment lengths in km
        geometry_len_km = as.numeric(st_length(geometry)) / 1000,
        .before = geometry
    )

Map

Note: Only the first 1000 records are shown as a sample.

tmap_mode("view")
tm_shape(
    vanc_preprocess %>% head(1000)) +
    tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

vanc_preprocess %>% as_tibble

Details

print(vanc_preprocess)
## Simple feature collection with 3666 features and 22 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -123.2238 ymin: 49.19899 xmax: -123.0233 ymax: 49.31428
## Geodetic CRS:  WGS 84
## # A tibble: 3,666 × 23
##    id     street     status road_type road_type_recode install_year install_type
##  * <chr>  <chr>      <chr>  <chr>     <chr>                   <dbl> <chr>       
##  1 294725 Highbury   Active Resident… Local                    2006 Local Street
##  2 294726 Highbury   Active Resident… Local                    2006 Local Street
##  3 294731 W 8th Ave  Active Resident… Local                    1994 Local Street
##  4 294732 W 8th Ave  Active Resident… Local                    1994 Local Street
##  5 294733 Off Street Active Lane      Local                    2003 Protected B…
##  6 294736 W 5th Ave  Active Resident… Local                    2009 Local Street
##  7 294737 W 8th Ave  Active Resident… Local                    1994 Local Street
##  8 294738 W 7th Ave  Active Resident… Local                    1994 Local Street
##  9 294739 W 7th Ave  Active Resident… Local                    1994 Local Street
## 10 294742 W 7th Ave  Active Resident… Local                    1994 Local Street
## # ℹ 3,656 more rows
## # ℹ 16 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Files

Save vancouver-bikeways-preprocess-v1.geojson file:

# Save geojson
vanc_preprocess %>%
    write_sf("../data/vancouver-bikeways-preprocess-v1.geojson", delete_dsn = TRUE)

Save vancouver-bikeways-preprocess-v1.csv file:

# Save csv
# st_read("../data/vancouver-bikeways-preprocess-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
vanc_preprocess %>%
    mutate(
        geometry = st_as_text(geometry),
        geometry_crs = st_crs(vanc_preprocess)$proj4string,
        .before = geometry
    ) %>%
    write_csv("../data/vancouver-bikeways-preprocess-v1.csv", na = "")

Calgary Preprocessed Data

# Preprocess data
# calg_preprocess <- calg_raw %>%
#   select( # select and rename
#       id = object_id,
#       street = street_nam,
#       status = status,
#       road_type = street_seg,
#       #install_year = INSTALL4,
#       install_type = bikeway_ty,
#       verify_install_year = INST_YR,
#       verify_install_date = INST_DATE,
#       verify_install_type = INST_TMIN,
#       verify_install_comment = INST_COMM,
#       verify_upgrade1_year = UPGR1_YR,
#       verify_upgrade1_date = UPGR1_DATE,
#       verify_upgrade1_type = UPGR1_TMIN,
#       verify_upgrade1_comment = UPGR1_COMM,
#       verify_upgrade2_year = UPGR2_YR,
#       verify_upgrade2_date = UPGR2_DATE,
#       verify_upgrade2_type = UPGR2_TMIN,
#       verify_upgrade2_comment = UPGR2_COMM,
#       verify_misclass = EXCL_REAS
#   ) %>%
#   mutate( # data types
#       id = as.character(id),
#       street = as.character(street),
#       road_type = as.character(road_type),
#       #install_year = as.numeric(install_year),
#       install_type = as.character(install_type),
#       verify_install_year = as.numeric(verify_install_year),
#       verify_install_date = as.character(verify_install_date),
#       verify_install_type = as.character(verify_install_type),
#       verify_install_comment = as.character(verify_install_comment),
#       verify_upgrade1_year = as.numeric(verify_upgrade1_year),
#       verify_upgrade1_date = as.character(verify_upgrade1_date),
#       verify_upgrade1_type = as.character(verify_upgrade1_type),
#       verify_upgrade1_comment = as.character(verify_upgrade1_comment),
#       verify_upgrade2_year = as.numeric(verify_upgrade2_year),
#       verify_upgrade2_date = as.character(verify_upgrade2_date),
#       verify_upgrade2_type = as.character(verify_upgrade2_type),
#       verify_upgrade2_comment = as.character(verify_upgrade2_comment),
#       verify_misclass = as.character(verify_misclass)
#   ) %>%
#   mutate( # clean values
#       #install_year = na_if(install_year, 0),
#       verify_install_year = na_if(verify_install_year, 0),
#       verify_install_date = na_if(verify_install_date, "NA"),
#       verify_install_type = na_if(verify_install_type, "NA") %>%
#           str_replace_all("[^[:alpha:]]|\\s", ""),
#       verify_install_comment = na_if(verify_install_comment, "NA"),
#       verify_upgrade1_year = na_if(verify_upgrade1_year, 0),
#       verify_upgrade1_date = na_if(verify_upgrade1_date, "NA"),
#       verify_upgrade1_type = na_if(verify_upgrade1_type, "NA") %>%
#           str_replace_all("[^[:alpha:]]|\\s", ""),
#       verify_upgrade1_comment = na_if(verify_upgrade1_comment, "NA"),
#       verify_upgrade2_year = na_if(verify_upgrade2_year, 0),
#       verify_upgrade2_date = na_if(verify_upgrade2_date, "NA"),
#       verify_upgrade2_type = na_if(verify_upgrade2_type, "NA") %>%
#           str_replace_all("[^[:alpha:]]|\\s", ""),
#       verify_upgrade2_comment = na_if(verify_upgrade2_comment, "NA"),
#       verify_misclass =  na_if(verify_misclass, "NA") %>%
#           str_trim %>%
#           str_to_title
#   ) %>%
#   mutate( # add column for non-verified infra types
#       no_verify_install_type = if_else(
#           is.na(verify_install_type) & install_type == "Local Street",
#           "Local Street",
#           NA
#       ),
#       .after = verify_misclass
#   ) %>%
#   mutate( # add local street as non-verified LSB
#       verify_install_type = if_else(
#           is.na(verify_install_type) & install_type == "Local Street",
#           "LSB",
#           verify_install_type
#       )
#       # verify_install_year = if_else(
#       #   is.na(verify_install_type) & install_type == "Local Street",
#       #   install_year,
#       #   verify_install_year
#       # )
#   ) %>%
#   mutate( # create col for recoded road types
#       road_type_recode = case_when( # create road types
#           road_type %in% c( # arterial equiv
#               "Arterial"
#           ) ~ "Arterial",
#           road_type %in% c( # collector equiv
#               "Collector",
#               "Secondary Arterial",
#               "Sec Arterial"
#           ) ~ "Collector",
#           road_type %in% c( # local equiv
#               "Lane",
#               "Residential",
#               "Leased",
#               "Recreational"
#           ) ~ "Local",
#           .default = road_type
#       ),
#       .after = road_type
#   ) %>%
#   mutate( # create col for segment lengths in km
#       geometry_len_km = as.numeric(st_length(geometry)) / 1000,
#       .before = geometry
#   )

Map

Note: Only the first 1000 records are shown as a sample.

# tmap_mode("view")
# tm_shape(calg_preprocess %>% head(1000)) +
#   tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

# calg_preprocess %>% as_tibble

Details

# print(calg_preprocess)

Files

Save calgary-bikeways-preprocess-v1.geojson file:

# Save geojson
# calg_preprocess %>%
#   write_sf("../data/calgary-bikeways-preprocess-v1.geojson", delete_dsn = TRUE)

Save calgary-bikeways-preprocess-v1.csv file:

# Save csv
# st_read("../data/calgary-bikeways-preprocess-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
# calg_preprocess %>%
#   mutate(
#       geometry = st_as_text(geometry),
#       geometry_crs = st_crs(calg_preprocess)$proj4string,
#       .before = geometry
#   ) %>%
#   write_csv("../data/calgary-bikeways-preprocess-v1.csv", na = "")

Toronto Preprocessed Data

# Preprocess data
toron_preprocess <- toron_raw %>%
    select( # select and rename
        id = OBJECTI2,
        street = STREET_7,
        street_from = FROM_ST8,
        street_to = TO_STRE9,
        road_type = M_FEATUR36,
        install_year = C_INST_YR,
        install_type = INFRA_H20,
        verify_install_year = INST_YR,
        verify_install_date = INST_DATE,
        verify_install_type = INST_TMIN,
        verify_install_comment = INST_COMM,
        verify_upgrade1_year = UPGR1_YR,
        verify_upgrade1_date = UPGR1_DATE,
        verify_upgrade1_type = UPGR1_TMIN,
        verify_upgrade1_comment = UPGR1_COMM,
        verify_upgrade2_year = UPGR2_YR,
        verify_upgrade2_date = UPGR2_DATE,
        verify_upgrade2_type = UPGR2_TMIN,
        verify_upgrade2_comment = UPGR2_COMM,
        verify_misclass = EXCL_REAS
    ) %>%
    mutate( # data types
        id = as.character(id),
        street = as.character(street),
        street_from = as.character(street_from),
        street_to = as.character(street_to),
        road_type = as.character(road_type),
        install_year = as.numeric(install_year),
        install_type = as.character(install_type),
        verify_install_year = as.numeric(verify_install_year),
        verify_install_date = as.character(verify_install_date),
        verify_install_type = as.character(verify_install_type),
        verify_install_comment = as.character(verify_install_comment),
        verify_upgrade1_year = as.numeric(verify_upgrade1_year),
        verify_upgrade1_date = as.character(verify_upgrade1_date),
        verify_upgrade1_type = as.character(verify_upgrade1_type),
        verify_upgrade1_comment = as.character(verify_upgrade1_comment),
        verify_upgrade2_year = as.numeric(verify_upgrade2_year),
        verify_upgrade2_date = as.character(verify_upgrade2_date),
        verify_upgrade2_type = as.character(verify_upgrade2_type),
        verify_upgrade2_comment = as.character(verify_upgrade2_comment),
        verify_misclass = as.character(verify_misclass)
    ) %>%
    mutate( # clean values
        install_year = na_if(install_year, 0),
        verify_install_year = na_if(verify_install_year, 0),
        verify_install_date = na_if(verify_install_date, "NA"),
        verify_install_type = na_if(verify_install_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_install_comment = na_if(verify_install_comment, "NA"),
        verify_upgrade1_year = na_if(verify_upgrade1_year, 0),
        verify_upgrade1_date = na_if(verify_upgrade1_date, "NA"),
        verify_upgrade1_type = na_if(verify_upgrade1_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_upgrade1_comment = na_if(verify_upgrade1_comment, "NA"),
        verify_upgrade2_year = na_if(verify_upgrade2_year, 0),
        verify_upgrade2_date = na_if(verify_upgrade2_date, "NA"),
        verify_upgrade2_type = na_if(verify_upgrade2_type, "NA") %>%
            str_replace_all("[^[:alpha:]]|\\s", ""),
        verify_upgrade2_comment = na_if(verify_upgrade2_comment, "NA"),
        verify_misclass =  na_if(verify_misclass, "NA") %>%
            str_trim %>%
            str_to_title
    ) %>%
    mutate( # create col for recoded road types
        road_type_recode = case_when( # create road types
            road_type %in% c( # arterial equiv
                "Major Arterial",
                "Major Arterial Ramp",
                "Minor Arterial"
            ) ~ "Arterial",
            road_type %in% c( # collector equiv
                "Collector"
            ) ~ "Collector",
            road_type %in% c(  # local equiv
                "Local",
                "Other"
            ) ~ "Local",
            .default = road_type
        ),
        .after = road_type
    ) %>%
    mutate( # create col for segment lengths in km
        geometry_len_km = as.numeric(st_length(geometry)) / 1000,
        .before = geometry
    )

Map

Note: Only the first 1000 records are shown as a sample.

tmap_mode("view")
tm_shape(toron_preprocess %>% head(1000)) +
    tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

toron_preprocess %>% as_tibble

Details

print(toron_preprocess)
## Simple feature collection with 1323 features and 22 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.63039 ymin: 43.58221 xmax: -79.11803 ymax: 43.85546
## Geodetic CRS:  WGS 84
## # A tibble: 1,323 × 23
##    id    street    street_from street_to road_type road_type_recode install_year
##  * <chr> <chr>     <chr>       <chr>     <chr>     <chr>                   <dbl>
##  1 8     Bloor St… Parliament… Castle F… Major Ar… Arterial                 2001
##  2 17    Lake Sho… Humber Bay… Humber B… Major Ar… Arterial                 2001
##  3 18    Lake Sho… 37 M E Fle… Humber B… Major Ar… Arterial                 2001
##  4 19    Lake Sho… 50.7 M E L… 37 M E F… Major Ar… Arterial                 2001
##  5 38    Queens Q… Martin Goo… Bathurst… Collector Collector                2001
##  6 39    Davenpor… Cottingham… Macphers… Minor Ar… Arterial                 2001
##  7 40    Elizabet… College St  Gerrard … Collector Collector                2001
##  8 41    Gerrard … Yonge St    Church St Minor Ar… Arterial                 2001
##  9 42    Macphers… Davenport … Poplar P… Collector Collector                2001
## 10 43    Lake Sho… Marine Par… Palace P… Major Ar… Arterial                 2001
## # ℹ 1,313 more rows
## # ℹ 16 more variables: install_type <chr>, verify_install_year <dbl>,
## #   verify_install_date <chr>, verify_install_type <chr>,
## #   verify_install_comment <chr>, verify_upgrade1_year <dbl>,
## #   verify_upgrade1_date <chr>, verify_upgrade1_type <chr>,
## #   verify_upgrade1_comment <chr>, verify_upgrade2_year <dbl>,
## #   verify_upgrade2_date <chr>, verify_upgrade2_type <chr>, …

Files

Save toronto-bikeways-preprocess-v1.geojson file:

# Save geojson
toron_preprocess %>%
    write_sf("../data/toronto-bikeways-preprocess-v1.geojson", delete_dsn = TRUE)

Save toronto-bikeways-preprocess-v1.csv file:

# Save csv
# st_read("../data/toronto-bikeways-preprocess-v1.csv", options = "GEOM_POSSIBLE_NAMES=geometry", crs = "urn:ogc:def:crs:OGC:1.3:CRS84")
toron_preprocess %>%
    mutate(
        geometry = st_as_text(geometry),
        geometry_crs = st_crs(toron_preprocess)$proj4string,
        .before = geometry
    ) %>%
    write_csv("../data/toronto-bikeways-preprocess-v1.csv", na = "")

Inclusion and Exclusion Criteria

Apply filters for inclusion and exclusion criteria using function filter_criteria as described in the methods and Appendix 2.

# Build filter criteria
cities_criteria <- list(
    vancouver = list(
        city = "vancouver",
        data = vanc_preprocess,
        data_date = "January 2023",
        data_url = "https://opendata.vancouver.ca/explore/dataset/bikeways/information",
        infra_col = "install_type",
        infra_filter = c("Painted Lanes", "Protected Bike Lanes", "Local Street"),
        road_col = "road_type",
        road_filter = c("Off-street"),
        geom_col = "geometry",
        geom_unit = "km",
        geom_filter = TRUE,
        misclass_col = "verify_misclass",
        misclass_filter = c(NA, "NA"),
        noverify_col = "no_verify_install_type",
        noverify_filter = c("Local Street")
    ),
    # list(
    #   city = "calgary",
    #   data = calg_preprocess,
    #   data_date = "January 2023",
    #   data_url = "https://data.calgary.ca/Transportation-Transit/Calgary-Bikeways/jjqk-9b73",
    #   infra_col = "install_type",
    #   infra_filter = c("Bicycle Lane", "Cycle Track"),
    #   status_col = "install_status",
    #   status_filter = c("INACTIVE", "PLANNED"),
    #   geom_col = "geometry",
    #   geom_unit = "km",
    #   geom_filter = TRUE,
    #   misclass_col = "verify_misclass",
    #   misclass_filter = c(NA, "NA")
    # ),
    toronto = list(
        city = "toronto",
        data = toron_preprocess,
        data_date = "January 2023",
        data_url = "https://open.toronto.ca/dataset/cycling-network",
        infra_col = "install_type",
        infra_filter = c("Bi-Directional Cycle Track", "Bike Lane", "Bike Lane - Buffered", "Bike Lane - Contraflow", "Cycle Track", "Cycle Track - Contraflow"),
        geom_col = "geometry",
        geom_unit = "km",
        geom_filter = TRUE,
        misclass_col = "verify_misclass",
        misclass_filter = c(NA, "NA")
    )
)

# Apply filter criteria for all cities
criteria_data <- filter_criteria(cities_criteria)

Vancouver Filtered Data

vanc <- criteria_data$vancouver$data_filter

Map

Note: Only the first 1000 records are shown as a sample.

tmap_mode("view")
tm_shape(vanc %>% head(1000)) +
    tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

vanc %>% as_tibble

Details

print(vanc)
## Simple feature collection with 3118 features and 22 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -123.2196 ymin: 49.20424 xmax: -123.0234 ymax: 49.31428
## Geodetic CRS:  WGS 84
## # A tibble: 3,118 × 23
##    id     street     status road_type road_type_recode install_year install_type
##  * <chr>  <chr>      <chr>  <chr>     <chr>                   <dbl> <chr>       
##  1 294725 Highbury   Active Resident… Local                    2006 Local Street
##  2 294726 Highbury   Active Resident… Local                    2006 Local Street
##  3 294731 W 8th Ave  Active Resident… Local                    1994 Local Street
##  4 294732 W 8th Ave  Active Resident… Local                    1994 Local Street
##  5 294736 W 5th Ave  Active Resident… Local                    2009 Local Street
##  6 294737 W 8th Ave  Active Resident… Local                    1994 Local Street
##  7 294738 W 7th Ave  Active Resident… Local                    1994 Local Street
##  8 294739 W 7th Ave  Active Resident… Local                    1994 Local Street
##  9 294742 W 7th Ave  Active Resident… Local                    1994 Local Street
## 10 294746 SW Marine… Active Resident… Local                    1997 Painted Lan…
## # ℹ 3,108 more rows
## # ℹ 16 more variables: verify_install_year <dbl>, verify_install_date <chr>,
## #   verify_install_type <chr>, verify_install_comment <chr>,
## #   verify_upgrade1_year <dbl>, verify_upgrade1_date <chr>,
## #   verify_upgrade1_type <chr>, verify_upgrade1_comment <chr>,
## #   verify_upgrade2_year <dbl>, verify_upgrade2_date <chr>,
## #   verify_upgrade2_type <chr>, verify_upgrade2_comment <chr>, …

Calgary Filtered Data

#calg<- criteria_data$calgary$data_filter

Map

Note: Only the first 1000 records are shown as a sample.

#tmap_mode("view")
#tm_shape(calg %>% head(1000)) +
#   tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

#calg %>% as_tibble

Details

#print(calg)

Toronto Filtered Data

toron <- criteria_data$toronto$data_filter

Map

Note: Only the first 1000 records are shown as a sample.

tmap_mode("view")
tm_shape(toron %>% head(1000)) +
    tm_lines(col = "verify_install_type", popup.vars = TRUE)

Data

toron %>% as_tibble

Details

print(toron)
## Simple feature collection with 326 features and 22 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.58768 ymin: 43.5923 xmax: -79.12199 ymax: 43.85546
## Geodetic CRS:  WGS 84
## # A tibble: 326 × 23
##    id    street    street_from street_to road_type road_type_recode install_year
##  * <chr> <chr>     <chr>       <chr>     <chr>     <chr>                   <dbl>
##  1 8     Bloor St… Parliament… Castle F… Major Ar… Arterial                 2001
##  2 17    Lake Sho… Humber Bay… Humber B… Major Ar… Arterial                 2001
##  3 18    Lake Sho… 37 M E Fle… Humber B… Major Ar… Arterial                 2001
##  4 19    Lake Sho… 50.7 M E L… 37 M E F… Major Ar… Arterial                 2001
##  5 38    Queens Q… Martin Goo… Bathurst… Collector Collector                2001
##  6 39    Davenpor… Cottingham… Macphers… Minor Ar… Arterial                 2001
##  7 40    Elizabet… College St  Gerrard … Collector Collector                2001
##  8 41    Gerrard … Yonge St    Church St Minor Ar… Arterial                 2001
##  9 42    Macphers… Davenport … Poplar P… Collector Collector                2001
## 10 43    Lake Sho… Marine Par… Palace P… Major Ar… Arterial                 2001
## # ℹ 316 more rows
## # ℹ 16 more variables: install_type <chr>, verify_install_year <dbl>,
## #   verify_install_date <chr>, verify_install_type <chr>,
## #   verify_install_comment <chr>, verify_upgrade1_year <dbl>,
## #   verify_upgrade1_date <chr>, verify_upgrade1_type <chr>,
## #   verify_upgrade1_comment <chr>, verify_upgrade2_year <dbl>,
## #   verify_upgrade2_date <chr>, verify_upgrade2_type <chr>, …

Map Data Preparation

Prepare data and settings for figures with maps.

map_data <- list(
    vancouver = list(
        title = "Vancouver, CA",
        data = vanc,
        downtown_bbox = c(
            xmin = -123.143450,
            ymin = 49.269529,
            xmax = -123.095584,
            ymax = 49.296229
        )
    ),
    # calgary = list(
    #   title = "Calgary, CA",
    #   data = calg,
    #   downtown_bbox = c(
    #       xmin = -114.127909,
    #       ymin = 51.006626,
    #       xmax = -113.975817,
    #       ymax = 51.081312,
    #   )
    # ),
    toronto = list(
        title = "Toronto, CA",
        data = toron,
        downtown_bbox = c(
            xmin = -79.300395,
            ymin = 43.636621,
            xmax = -79.489565,
            ymax = 43.698150
        )
    )
)

Figures

Figure 1: Flow diagram of inclusion criteria for bikeway segments in Vancouver, Calgary, and Toronto.

This flowchart provides a high-level overview of the segment inclusions and exclusions for each municipality. Data from Calgary were specific to on-street routes only. For detailed flow diagrams specific to each municipality, please refer to the Appendix.

fig1 <- diag_criteria(criteria_data)

Figure

fig1

File

fig1 %>%
    export_svg %>%
    charToRaw %>%
    rsvg_pdf("fig-methods.pdf")

Figure 2: Changes in dedicated cycling infrastructure between 2009 and 2022 for Vancouver, Calgary, and Toronto by infrastructure category.

Assessed using roadway centreline-km, with infrastructure classifications determined by the most protective element present along each road segment.

plot_yearly_len_infra(list(
    "Vancouver, CA" = vanc,
    #"Calgary, CA" = calg %>% as_tibble,
    "Toronto, CA" = toron
))

Figure 4: Changes in Dedicated On-Street Infrastructure Since January 2020 for Vancouver, Calgary, and Toronto.

New installations of dedicated infrastructure are denoted in green, upgrades from a previous dedicated infrastructure type are denoted in orange. Basemap from OpenStreetMap and Carto (Positron).

map_infra(map_data) %>% print

Appendix 1 - Supplementary Results

Supplementary Figure 1: Enlarged Map. Changes in Dedicated On-Street Infrastructure Between 2020-2021 for the Municipality of Vancouver, CA.

New installations of dedicated infrastructure are denoted in green, upgrades from a previous dedicated infrastructure type are denoted in orange. Basemap from OpenStreetMap and Carto (Positron).

map_infra_detail(
    map_data,
    "vancouver",
    map_inset_position = c(
        left = -0.85,
        bottom = 0.65,
        right = 1.2125,
        top = 0.99
    ),
    map_ratio = 1.25,
    map_inset_ratio = 1.2
) %>% print

Supplementary Figure 2: Enlarged Map. Changes in Dedicated On-Street Infrastructure Between 2020-2022 for the Municipality of Calgary, CA.

New installations of dedicated infrastructure are denoted in green, upgrades of dedicated infrastructure are denoted in orange. Basemap from OpenStreetMap and Carto (Positron).

# map_infra_detail(
#     map_data,
#     "calgary",
#     map_inset_position = c(
#         left = -0.85,
#         bottom = 0.65,
#         right = 1.2125,
#         top = 0.99
#     ),
#     map_ratio = 1.25,
#     map_inset_ratio = 1.2
# ) %>% print

Supplementary Figure 3: Enlarged Map. Changes in Dedicated On-Street Infrastructure Between 2020-2022 for the Municipality of Toronto, CA.

New installations of dedicated infrastructure are denoted in green, upgrades of dedicated infrastructure are denoted in orange. Basemap from OpenStreetMap and Carto (Positron).

map_infra_detail(
    map_data,
    "toronto",
    map_inset_position = c(
        left = -0.85,
        bottom = 0.65,
        right = 1.38,
        top = 0.99
    ),
    map_ratio = 1.75,
    map_inset_ratio = 2.5
) %>% print

Supplementary Figure 4: Changes in dedicated cycling infrastructure between 2009 and 2021 for the Municipality of Vancouver, CA.

By (A) roadway classification, and (B) infrastructure distribution within each road class. Assessed using roadway centreline-km, with infrastructure classification determined by the most protective element present along each road segment.

plot_yearly_len_road(
    vanc,
    title = "Roadways with Dedicated Cycling Infrastructure (Vancouver, CA)"
)

Supplementary Figure 5: Changes in dedicated cycling infrastructure between 2009 and 2022 for the Municipality of Calgary, CA.

By (A) roadway classification, and (B) infrastructure distribution within each road class. Assessed using roadway centreline-km, with infrastructure classification determined by the most protective element present along each road segment.

# plot_yearly_len_road(
#   calg,
#   title = "Roadways with Dedicated Cycling Infrastructure (Calgary, CA)"
# )

Supplementary Figure 6: Changes in dedicated cycling infrastructure between 2009 and 2022 for the Municipality of Toronto, CA.

By (A) roadway classification, and (B) infrastructure distribution within each road class. Assessed using roadway centreline-km, with infrastructure classification determined by the most protective element present along each road segment.

plot_yearly_len_road(
    toron,
    title = "Roadways with Dedicated Cycling Infrastructure (Toronto, CA)"
)

Supplementary Figure 7: A comparative analysis between municipal data and verified data on the installation years for cycling infrastructure in Vancouver, CA.

# Create the plot
sfig7 <- plot_yearly_diff(
    vanc %>% filter(
        is.na(no_verify_install_type)
    ),
    title = "Difference in Installation Years, Comparing City Data and Verified Data: Vancouver, CA",
    out_data = TRUE
)

# Calc metrics for description
sfig7_n <- sum(sfig7$data$n)
sfig7_0 <- sfig7$data %>%  # perc correct
    filter(year_diff == 0) %>%
    pull(n_perc) %>%
    round(1)
sfig7_pm1 <- sfig7$data %>% # perc plus/minus 1
    filter(year_diff >= -1 & year_diff <= 1) %>%
    pull(n_perc) %>%
    sum %>%
    round(1)

Any data where a city provided and verified installation years were missing or the verified year occurred earlier or equal to the start of the study period (2009) has been excluded from analysis, yielding n=251 segments. The graph shows that 83.3% of the included segments had the correct installation year as per the city’s data, and 97.2% were accurate within a range of ±1 year.

sfig7$plot

Supplementary Figure 8: A comparative analysis between municipal data and verified data on the installation years for cycling infrastructure in Calgary, CA.

# # Create the plot
# sfig8 <- plot_yearly_diff(
#   calg,
#   title = "Difference in Installation Years, Comparing City Data and Verified Data: Calgary, CA",
#   out_data = TRUE
# )
# 
# # Calc metrics for description
# sfig8_n <- sum(sfig8$data$n)
# sfig8_0 <- sfig8$data %>%  # perc correct
#   filter(year_diff == 0) %>%
#   pull(n_perc) %>%
#   round(1)
# sfig8_pm1 <- sfig8$data %>% # perc plus/minus 1
#   filter(year_diff >= -1 & year_diff <= 1) %>%
#   pull(n_perc) %>%
#   sum %>%
#   round(1)

Any data where a city provided and verified installation years were missing or the verified year occurred earlier or equal to the start of the study period (2009) has been excluded from analysis, yielding n=sfig8_n segments. The graph shows that sfig8_0% of the included segments had the correct installation year as per the city’s data, and sfig8_pm1% were accurate within a range of ±1 year.

#sfig8$plot

Supplementary Figure 9: A comparative analysis between municipal data and verified data on the installation years for cycling infrastructure in Toronto, CA.

# Create the plot
sfig9 <- plot_yearly_diff(
    toron,
    title = "Difference in Installation Years, Comparing City Data and Verified Data: Toronto, CA",
    out_data = TRUE
)

# Calc metrics for description
sfig9_n <- sum(sfig9$data$n)
sfig9_0 <- sfig9$data %>%  # perc correct
    filter(year_diff == 0) %>%
    pull(n_perc) %>%
    round(1)
sfig9_pm1 <- sfig9$data %>% # perc plus/minus 1
    filter(year_diff >= -1 & year_diff <= 1) %>%
    pull(n_perc) %>%
    sum %>%
    round(1)

Any data where a city provided and verified installation years were missing or the verified year occurred earlier or equal to the start of the study period (2009) has been excluded from analysis, yielding n=188 segments. The graph shows that 74.5% of the included segments had the correct installation year as per the city’s data, and 78.2% were accurate within a range of ±1 year.

sfig9$plot

Appendix 2 - Methodology

Segment Inclusion Criteria for Vancouver

diag_criteria_details(criteria_data, "vancouver")

Segment Inclusion Criteria for Calgary

#diag_criteria_details(criteria_data, "calgary")

Segment Inclusion Criteria for Toronto

diag_criteria_details(criteria_data, "toronto")

Contributions

Richard Wen developed reproducible R code and organized the data based on Konrad Samsel’s draft manuscript and previous R code. Konrad Samsel prepared draft manuscript, raw data, and provided consultation on data and methods.

Acknowledgements

Linda Rothman and Brice Batomen provided supervision, project administration, resources, funding, and review/editing for the draft manuscript.

Software and Package Versions

R and R package versions:

## R version 4.3.3 (2024-02-29)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Toronto
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] units_0.8-5       prettymapr_0.2.5  ggspatial_1.1.9   tmap_3.3-4       
##  [5] sf_1.0-16         rsvg_2.6.0        magick_2.8.3      webshot2_0.1.1   
##  [9] DiagrammeRsvg_0.1 DiagrammeR_1.0.11 patchwork_1.2.0   scales_1.3.0     
## [13] ggtext_0.1.2      readxl_1.4.3      glue_1.7.0        lubridate_1.9.3  
## [17] forcats_1.0.0     stringr_1.5.1     dplyr_1.1.2       purrr_1.0.1      
## [21] readr_2.1.4       tidyr_1.3.0       tibble_3.2.1      ggplot2_3.5.0    
## [25] tidyverse_2.0.0   knitr_1.43        bookdown_0.38     rmarkdown_2.26   
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.2.2               tmaptools_3.1-1         s2_1.1.6               
##  [4] rlang_1.1.3             magrittr_2.0.3          e1071_1.7-14           
##  [7] compiler_4.3.3          png_0.1-8               vctrs_0.6.5            
## [10] crayon_1.5.2            wk_0.9.1                pkgconfig_2.0.3        
## [13] fastmap_1.1.1           ellipsis_0.3.2          labeling_0.4.3         
## [16] lwgeom_0.2-14           leafem_0.2.3            utf8_1.2.4             
## [19] promises_1.2.1          tzdb_0.4.0              ps_1.7.5               
## [22] bit_4.0.5               xfun_0.42               cachem_1.0.8           
## [25] jsonlite_1.8.8          rosm_0.3.0              highr_0.10             
## [28] later_1.3.1             terra_1.7-71            parallel_4.3.3         
## [31] R6_2.5.1                bslib_0.5.0             stringi_1.8.3          
## [34] RColorBrewer_1.1-3      jquerylib_0.1.4         cellranger_1.1.0       
## [37] stars_0.6-4             Rcpp_1.0.12             base64enc_0.1-3        
## [40] leaflet.providers_2.0.0 timechange_0.3.0        tidyselect_1.2.0       
## [43] rstudioapi_0.15.0       dichromat_2.0-0.1       abind_1.4-5            
## [46] yaml_2.3.7              codetools_0.2-19        websocket_1.4.1        
## [49] curl_5.2.1              processx_3.8.2          plyr_1.8.9             
## [52] lattice_0.22-5          leafsync_0.1.0          withr_3.0.0            
## [55] evaluate_0.21           proxy_0.4-27            xml2_1.3.6             
## [58] pillar_1.9.0            KernSmooth_2.23-22      generics_0.1.3         
## [61] vroom_1.6.3             sp_2.1-3                chromote_0.2.0         
## [64] hms_1.1.3               munsell_0.5.0           class_7.3-22           
## [67] tools_4.3.3             visNetwork_2.1.2        XML_3.99-0.16.1        
## [70] grid_4.3.3              crosstalk_1.2.1         colorspace_2.1-0       
## [73] raster_3.6-26           cli_3.6.2               fansi_1.0.6            
## [76] viridisLite_0.4.2       V8_4.4.2                gtable_0.3.4           
## [79] sass_0.4.7              digest_0.6.33           classInt_0.4-10        
## [82] farver_2.1.1            htmlwidgets_1.6.4       htmltools_0.5.7        
## [85] lifecycle_1.0.4         leaflet_2.2.1           bit64_4.0.5            
## [88] gridtext_0.1.5

RStudio version:

## $citation
## To cite RStudio in publications use:
## 
##   Posit team (2024). RStudio: Integrated Development Environment for R.
##   Posit Software, PBC, Boston, MA. URL http://www.posit.co/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {RStudio: Integrated Development Environment for R},
##     author = {{Posit team}},
##     organization = {Posit Software, PBC},
##     address = {Boston, MA},
##     year = {2024},
##     url = {http://www.posit.co/},
##   }
## 
## $mode
## [1] "desktop"
## 
## $version
## [1] '2023.12.1.402'
## 
## $long_version
## [1] "2023.12.1+402"
## 
## $release_name
## [1] "Ocean Storm"